From 61b8c771ed906720459b029d91f97c7df0785938 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 13 Feb 2018 21:42:32 +0100 Subject: Added skeleton for Python interface using Cython --- src/pyclblast/pyclblast/pyclblast.pyx | 375 ++++++++++++++++++++++++++++++++++ src/pyclblast/setup.py | 36 ++++ 2 files changed, 411 insertions(+) create mode 100644 src/pyclblast/pyclblast/pyclblast.pyx create mode 100644 src/pyclblast/setup.py diff --git a/src/pyclblast/pyclblast/pyclblast.pyx b/src/pyclblast/pyclblast/pyclblast.pyx new file mode 100644 index 00000000..0cc3b237 --- /dev/null +++ b/src/pyclblast/pyclblast/pyclblast.pyx @@ -0,0 +1,375 @@ + +#################################################################################################### +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren +# +# This file defines the Python interface to CLBlast. It is inspired by: +# https://github.com/hunse/pyopencl_blas +# +#################################################################################################### + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array + +from libcpp cimport bool + +#################################################################################################### +# CLBlast and OpenCL data-types +#################################################################################################### + +cdef extern from "clblast_c.h": + + # Status codes + ctypedef enum CLBlastStatusCode: + CLBlastSuccess + CLBlastOpenCLCompilerNotAvailable + CLBlastTempBufferAllocFailure + CLBlastOpenCLOutOfResources + CLBlastOpenCLOutOfHostMemory + CLBlastOpenCLBuildProgramFailure + CLBlastInvalidValue + CLBlastInvalidCommandQueue + CLBlastInvalidMemObject + CLBlastInvalidBinary + CLBlastInvalidBuildOptions + CLBlastInvalidProgram + CLBlastInvalidProgramExecutable + CLBlastInvalidKernelName + CLBlastInvalidKernelDefinition + CLBlastInvalidKernel + CLBlastInvalidArgIndex + CLBlastInvalidArgValue + CLBlastInvalidArgSize + CLBlastInvalidKernelArgs + CLBlastInvalidLocalNumDimensions + CLBlastInvalidLocalThreadsTotal + CLBlastInvalidLocalThreadsDim + CLBlastInvalidGlobalOffset + CLBlastInvalidEventWaitList + CLBlastInvalidEvent + CLBlastInvalidOperation + CLBlastInvalidBufferSize + CLBlastInvalidGlobalWorkSize + CLBlastNotImplemented + CLBlastInvalidMatrixA + CLBlastInvalidMatrixB + CLBlastInvalidMatrixC + CLBlastInvalidVectorX + CLBlastInvalidVectorY + CLBlastInvalidDimension + CLBlastInvalidLeadDimA + CLBlastInvalidLeadDimB + CLBlastInvalidLeadDimC + CLBlastInvalidIncrementX + CLBlastInvalidIncrementY + CLBlastInsufficientMemoryA + CLBlastInsufficientMemoryB + CLBlastInsufficientMemoryC + CLBlastInsufficientMemoryX + CLBlastInsufficientMemoryY + CLBlastInvalidBatchCount + CLBlastInvalidOverrideKernel + CLBlastMissingOverrideParameter + CLBlastInvalidLocalMemUsage + CLBlastNoHalfPrecision + CLBlastNoDoublePrecision + CLBlastInvalidVectorScalar + CLBlastInsufficientMemoryScalar + CLBlastDatabaseError + CLBlastUnknownError + CLBlastUnexpectedError + + # OpenCL data-types + ctypedef float cl_float + ctypedef double cl_double + ctypedef unsigned int cl_uint + ctypedef struct cl_float2: + cl_float x + cl_float y + ctypedef struct cl_double2: + cl_double x + cl_double y + + # OpenCL special data-types + struct _cl_mem: + pass + struct _cl_command_queue: + pass + struct _cl_event: + pass + ctypedef _cl_mem* cl_mem + ctypedef _cl_command_queue* cl_command_queue + ctypedef _cl_event* cl_event + + # Matrix layout and transpose types + ctypedef enum CLBlastLayout: + CLBlastLayoutRowMajor + CLBlastLayoutColMajor + ctypedef enum CLBlastTranspose: + CLBlastTransposeNo + CLBlastTransposeYes + CLBlastTransposeConjugate + ctypedef enum CLBlastTriangle: + CLBlastTriangleUpper + CLBlastTriangleLower + ctypedef enum CLBlastDiagonal: + CLBlastDiagonalNonUnit + CLBlastDiagonalUnit + ctypedef enum CLBlastSide: + CLBlastSideLeft + CLBlastSideRight + + # Precision enum + ctypedef enum CLBlastPrecision: + CLBlastPrecisionSingle + CLBlastPrecisionDouble + CLBlastPrecisionComplexSingle + CLBlastPrecisionComplexDouble + +# Translates status codes into readable messages +cdef get_status_message(CLBlastStatusCode status): + if status == CLBlastSuccess: + return "CLBlastSuccess" + if status == CLBlastOpenCLCompilerNotAvailable: + return "CLBlastOpenCLCompilerNotAvailable: CL_COMPILER_NOT_AVAILABLE" + if status == CLBlastTempBufferAllocFailure: + return "CLBlastTempBufferAllocFailure: CL_MEM_OBJECT_ALLOCATION_FAILURE" + if status == CLBlastOpenCLOutOfResources: + return "CLBlastOpenCLOutOfResources: CL_OUT_OF_RESOURCES" + if status == CLBlastOpenCLOutOfHostMemory: + return "CLBlastOpenCLOutOfHostMemory: CL_OUT_OF_HOST_MEMORY" + if status == CLBlastOpenCLBuildProgramFailure: + return "CLBlastOpenCLBuildProgramFailure: CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error" + if status == CLBlastInvalidValue: + return "CLBlastInvalidValue: CL_INVALID_VALUE" + if status == CLBlastInvalidCommandQueue: + return "CLBlastInvalidCommandQueue: CL_INVALID_COMMAND_QUEUE" + if status == CLBlastInvalidMemObject: + return "CLBlastInvalidMemObject: CL_INVALID_MEM_OBJECT" + if status == CLBlastInvalidBinary: + return "CLBlastInvalidBinary: CL_INVALID_BINARY" + if status == CLBlastInvalidBuildOptions: + return "CLBlastInvalidBuildOptions: CL_INVALID_BUILD_OPTIONS" + if status == CLBlastInvalidProgram: + return "CLBlastInvalidProgram: CL_INVALID_PROGRAM" + if status == CLBlastInvalidProgramExecutable: + return "CLBlastInvalidProgramExecutable: CL_INVALID_PROGRAM_EXECUTABLE" + if status == CLBlastInvalidKernelName: + return "CLBlastInvalidKernelName: CL_INVALID_KERNEL_NAME" + if status == CLBlastInvalidKernelDefinition: + return "CLBlastInvalidKernelDefinition: CL_INVALID_KERNEL_DEFINITION" + if status == CLBlastInvalidKernel: + return "CLBlastInvalidKernel: CL_INVALID_KERNEL" + if status == CLBlastInvalidArgIndex: + return "CLBlastInvalidArgIndex: CL_INVALID_ARG_INDEX" + if status == CLBlastInvalidArgValue: + return "CLBlastInvalidArgValue: CL_INVALID_ARG_VALUE" + if status == CLBlastInvalidArgSize: + return "CLBlastInvalidArgSize: CL_INVALID_ARG_SIZE" + if status == CLBlastInvalidKernelArgs: + return "CLBlastInvalidKernelArgs: CL_INVALID_KERNEL_ARGS" + if status == CLBlastInvalidLocalNumDimensions: + return "CLBlastInvalidLocalNumDimensions: CL_INVALID_WORK_DIMENSION: Too many thread dimensions" + if status == CLBlastInvalidLocalThreadsTotal: + return "CLBlastInvalidLocalThreadsTotal: CL_INVALID_WORK_GROUP_SIZE: Too many threads in total" + if status == CLBlastInvalidLocalThreadsDim: + return "CLBlastInvalidLocalThreadsDim: CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension" + if status == CLBlastInvalidGlobalOffset: + return "CLBlastInvalidGlobalOffset: CL_INVALID_GLOBAL_OFFSET" + if status == CLBlastInvalidEventWaitList: + return "CLBlastInvalidEventWaitList: CL_INVALID_EVENT_WAIT_LIST" + if status == CLBlastInvalidEvent: + return "CLBlastInvalidEvent: CL_INVALID_EVENT" + if status == CLBlastInvalidOperation: + return "CLBlastInvalidOperation: CL_INVALID_OPERATION" + if status == CLBlastInvalidBufferSize: + return "CLBlastInvalidBufferSize: CL_INVALID_BUFFER_SIZE" + if status == CLBlastInvalidGlobalWorkSize: + return "CLBlastInvalidGlobalWorkSize: CL_INVALID_GLOBAL_WORK_SIZE" + if status == CLBlastNotImplemented: + return "CLBlastNotImplemented: Routine or functionality not implemented yet" + if status == CLBlastInvalidMatrixA: + return "CLBlastInvalidMatrixA: Matrix A is not a valid OpenCL buffer" + if status == CLBlastInvalidMatrixB: + return "CLBlastInvalidMatrixB: Matrix B is not a valid OpenCL buffer" + if status == CLBlastInvalidMatrixC: + return "CLBlastInvalidMatrixC: Matrix C is not a valid OpenCL buffer" + if status == CLBlastInvalidVectorX: + return "CLBlastInvalidVectorX: Vector X is not a valid OpenCL buffer" + if status == CLBlastInvalidVectorY: + return "CLBlastInvalidVectorY: Vector Y is not a valid OpenCL buffer" + if status == CLBlastInvalidDimension: + return "CLBlastInvalidDimension: Dimensions M, N, and K have to be larger than zero" + if status == CLBlastInvalidLeadDimA: + return "CLBlastInvalidLeadDimA: LD of A is smaller than the matrix's first dimension" + if status == CLBlastInvalidLeadDimB: + return "CLBlastInvalidLeadDimB: LD of B is smaller than the matrix's first dimension" + if status == CLBlastInvalidLeadDimC: + return "CLBlastInvalidLeadDimC: LD of C is smaller than the matrix's first dimension" + if status == CLBlastInvalidIncrementX: + return "CLBlastInvalidIncrementX: Increment of vector X cannot be zero" + if status == CLBlastInvalidIncrementY: + return "CLBlastInvalidIncrementY: Increment of vector Y cannot be zero" + if status == CLBlastInsufficientMemoryA: + return "CLBlastInsufficientMemoryA: Matrix A's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryB: + return "CLBlastInsufficientMemoryB: Matrix B's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryC: + return "CLBlastInsufficientMemoryC: Matrix C's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryX: + return "CLBlastInsufficientMemoryX: Vector X's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryY: + return "CLBlastInsufficientMemoryY: Vector Y's OpenCL buffer is too small" + if status == CLBlastInvalidBatchCount: + return "CLBlastInvalidBatchCount: The batch count needs to be positive" + if status == CLBlastInvalidOverrideKernel: + return "CLBlastInvalidOverrideKernel: Trying to override parameters for an invalid kernel" + if status == CLBlastMissingOverrideParameter: + return "CLBlastMissingOverrideParameter: Missing override parameter(s) for the target kernel" + if status == CLBlastInvalidLocalMemUsage: + return "CLBlastInvalidLocalMemUsage: Not enough local memory available on this device" + if status == CLBlastNoHalfPrecision: + return "CLBlastNoHalfPrecision: Half precision (16-bits) not supported by the device" + if status == CLBlastNoDoublePrecision: + return "CLBlastNoDoublePrecision: Double precision (64-bits) not supported by the device" + if status == CLBlastInvalidVectorScalar: + return "CLBlastInvalidVectorScalar: The unit-sized vector is not a valid OpenCL buffer" + if status == CLBlastInsufficientMemoryScalar: + return "CLBlastInsufficientMemoryScalar: The unit-sized vector's OpenCL buffer is too small" + if status == CLBlastDatabaseError: + return "CLBlastDatabaseError: Entry for the device was not found in the database" + if status == CLBlastUnknownError: + return "CLBlastUnknownError: A catch-all error code representing an unspecified error" + if status == CLBlastUnexpectedError: + return "CLBlastUnexpectedError: A catch-all error code representing an unexpected exception" + return "PyCLBlast: unrecognized CLBlast status code (code %d)" % status + +#################################################################################################### +# Generic helpers +#################################################################################################### + +dtype_size = {np.dtype('float32'): 4, + np.dtype('float64'): 8, + np.dtype('complex64'): 8, + np.dtype('complex128'): 16} + +def dtypes_str(dtypes): + if len(dtypes) == 1: + return "'%s'" % dtypes[0] + return "one of %s" % dtypes + + +def check_dtype(args, dtypes): + dtype = args[0].dtype + if not all(arg.dtype == dtype for arg in args): + raise ValueError("PyCLBlast: All arguments must have the same dtype (%s)" % dtypes_str(dtypes)) + if dtype not in dtypes: + raise ValueError("PyCLBlast: Data type must be %s" % dtypes_str(dtypes)) + return dtype + + +def check_array(a, ndim, name): + if not isinstance(a, Array): + raise ValueError("PyCLBlast: '%s' must be a PyOpenCL Array" % name) + if not len(a.shape) == ndim: + raise ValueError("PyCLBlast: '%s' must have %d dimensions (got %d)" % (name, ndim, len(a.shape))) + + +def check_matrix(a, name): + check_array(a, 2, name) + + +def check_vector(a, name): + check_array(a, 1, name) + + +def check_shape_dim(shape, dim, target, name): + if shape[dim] != target: + raise ValueError("PyCLBlast: '%s.shape[%d]' must be %d (got %d)" % (name, dim, target, shape[dim])) + +#################################################################################################### +# Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSswap( + const size_t n, + cl_mem x_buffer, + const size_t x_offset, + const size_t x_inc, + cl_mem y_buffer, + const size_t y_offset, + const size_t y_inc, + cl_command_queue* queue, + cl_event* event) + CLBlastStatusCode CLBlastDswap( + const size_t n, + cl_mem x_buffer, + const size_t x_offset, + const size_t x_inc, + cl_mem y_buffer, + const size_t y_offset, + const size_t y_inc, + cl_command_queue* queue, + cl_event* event) + CLBlastStatusCode CLBlastCswap( + const size_t n, + cl_mem x_buffer, + const size_t x_offset, + const size_t x_inc, + cl_mem y_buffer, + const size_t y_offset, + const size_t y_inc, + cl_command_queue* queue, + cl_event* event) + CLBlastStatusCode CLBlastZswap( + const size_t n, + cl_mem x_buffer, + const size_t x_offset, + const size_t x_inc, + cl_mem y_buffer, + const size_t y_offset, + const size_t y_inc, + cl_command_queue* queue, + cl_event* event) + +def swap(queue, x, y): + """y, x = x, y""" + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef size_t N = x.shape[0] + check_shape_dim(y.shape, 0, N, "y") + + cdef size_t element_size = dtype_size[dtype] + cdef cl_mem xdata = x.base_data.int_ptr + cdef size_t offx = x.offset / element_size + cdef int incx = x.strides[0] / element_size + cdef cl_mem ydata = y.base_data.int_ptr + cdef size_t offy = y.offset / element_size + cdef int incy = y.strides[0] / element_size + + cdef cl_command_queue commandQueue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode + if dtype == np.dtype("float32"): + err = CLBlastSswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized dtype '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py new file mode 100644 index 00000000..2a90f16d --- /dev/null +++ b/src/pyclblast/setup.py @@ -0,0 +1,36 @@ + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren + +from setuptools import setup + +from distutils.extension import Extension +from Cython.Distutils import build_ext + +ext_modules = list() +ext_modules.append( + Extension( + "pyclblast", + ["pyclblast/pyclblast.pyx"], + libraries=["clblast"], + language="c++" + ) +) + +setup( + name="pyclblast", + version="1.3.0", + author="Cedric Nugteren", + author_email="web@cedricnugteren.nl", + url="https://github.com/cnugteren/clblast", + description="Python bindings for CLBlast, the tuned OpenCL BLAS library", + license="ApacheV2", + requires=["pyopencl","cython"], + packages=["pyclblast"], + scripts=[], + ext_modules=ext_modules, + cmdclass={"build_ext": build_ext}, +) -- cgit v1.2.3 From eb85f6b514b285d7dde1ac02b97b7581a46ff21d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 14 Feb 2018 20:50:47 +0100 Subject: First agenerated version (clblastXswap only for now) of the pyclblast wrapper --- scripts/generator/generator.py | 12 +++-- scripts/generator/generator/convert.py | 13 +++++ scripts/generator/generator/pyclblast.py | 81 ++++++++++++++++++++++++++++++++ scripts/generator/generator/routine.py | 19 ++++++++ src/pyclblast/pyclblast/pyclblast.pyx | 78 ++++++------------------------ 5 files changed, 136 insertions(+), 67 deletions(-) create mode 100644 scripts/generator/generator/pyclblast.py diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 955625f5..c25d0e4f 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -18,6 +18,7 @@ # clblast_netlib_c.cpp # wrapper_clblas.h # wrapper_cblas.h +# pyclblast.pyx # It also generates the main functions for the correctness and performance tests as found in # test/correctness/routines/levelX/xYYYY.cpp # test/performance/routines/levelX/xYYYY.cpp @@ -30,6 +31,7 @@ import argparse import generator.cpp as cpp import generator.doc as doc +import generator.pyclblast as pyclblast from generator.routine import Routine from generator.datatype import H, S, D, C, Z, Sc, Dz, iH, iS, iD, iC, iZ, Css, Zdd, Ccs, Zzd, T, Tc, TU @@ -45,9 +47,10 @@ FILES = [ "/src/clblast_netlib_c.cpp", "/include/clblast_cuda.h", "/src/clblast_cuda.cpp", + "/src/pyclblast/pyclblast/pyclblast.pyx" ] -HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21] -FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55] +HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21, 288] +FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55, 1] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 @@ -209,7 +212,8 @@ def main(argv): body = "" levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4] for level in levels: - body += cpp.LEVEL_SEPARATORS[level - 1] + "\n" + if i not in [11]: + body += cpp.LEVEL_SEPARATORS[level - 1] + "\n" for routine in ROUTINES[level - 1]: if i == 0: body += cpp.clblast_h(routine) @@ -235,6 +239,8 @@ def main(argv): body += cpp.clblast_h(routine, cuda=True) if i == 10: body += cpp.clblast_cc(routine, cuda=True) + if i == 11: + body += pyclblast.generate_pyx(routine) f.write("".join(file_header)) f.write(body) f.write("".join(file_footer)) diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index 07f45669..44eb69d6 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -80,3 +80,16 @@ def option_to_documentation(x): 'triangle': "The part of the array of the triangular matrix to be used, either `Triangle::kUpper` (121) or `Triangle::kLower` (122).", 'diagonal': "The property of the diagonal matrix, either `Diagonal::kNonUnit` (131) for non-unit values on the diagonal or `Diagonal::kUnit` (132) for unit values on the diagonal.", }[x] + + +def option_to_clblastdefault(x): + """Translates an option name to a CLBlast C default type""" + return { + 'layout': "CLBlastLayoutColMajor", + 'a_transpose': "CLBlastTransposeNo", + 'b_transpose': "CLBlastTransposeNo", + 'ab_transpose': "CLBlastTransposeNo", + 'side': "CLBlastSideLeft", + 'triangle': "CLBlastTriangleUpper", + 'diagonal': "CLBlastDiagonalNonUnit", + }[x] diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py new file mode 100644 index 00000000..089a410a --- /dev/null +++ b/scripts/generator/generator/pyclblast.py @@ -0,0 +1,81 @@ + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the +# PEP8 Python style guide and uses a max-width of 120 characters per line. +# +# Author(s): +# Cedric Nugteren + +NL = "\n" +SEPARATOR = "####################################################################################################" + + +def to_np_dtype(flavour): + if flavour.precision_name == "S": + return "float32" + if flavour.precision_name == "D": + return "float64" + if flavour.precision_name == "C": + return "complex64" + if flavour.precision_name == "Z": + return "complex128" + raise RuntimeError("Could not convert flavour '%s' to numpy" % flavour.precision_name) + + +def generate_pyx(routine): + result = "" + if routine.implemented and routine.plain_name() == "swap": # TODO: Generalize + + result += SEPARATOR + NL + result += "# " + routine.description + ": " + routine.short_names() + NL + result += SEPARATOR + NL + result += NL + + result += "cdef extern from \"clblast_c.h\":" + NL + np_dtypes = [] + for flavour in routine.flavours: + if flavour.precision_name in ["S", "D", "C", "Z"]: + result += " CLBlastStatusCode CLBlast" + flavour.name + routine.plain_name() + "(" + result += ", ".join(routine.arguments_def_c(flavour)) + "," + result += "cl_command_queue* queue, cl_event* event)" + NL + np_dtypes.append(to_np_dtype(flavour)) + result += "" + NL + + buffers = routine.inputs[:] + routine.outputs[:] + result += "def " + routine.plain_name() + "(queue, " + result += ", ".join(routine.arguments_python()) + "):" + NL + result += " dtype = check_dtype([" + ", ".join(buffers) + "], " + result += "[" + ", ".join(['"%s"' % d for d in np_dtypes]) + "])" + NL + for buf in buffers: + if buf in routine.buffers_vector(): + result += " check_vector(" + else: + result += " check_matrix(" + result += buf + ", \"" + buf + "\")" + NL + result += "" + NL + + for buf in buffers: + result += " cdef cl_mem " + buf + "_buffer = " + buf + ".base_data.int_ptr" + NL + result += "" + NL + + result += " cdef cl_command_queue command_queue = queue.int_ptr" + NL + result += " cdef cl_event event = NULL" + NL + result += "" + NL + + result += " cdef CLBlastStatusCode err" + NL + if_prefix = "" + for flavour in routine.flavours: + if flavour.precision_name in ["S", "D", "C", "Z"]: + np_dtype = to_np_dtype(flavour) + result += " " + if_prefix + "if dtype == np.dtype(\"" + np_dtype + "\"):" + NL + result += " err = CLBlast" + flavour.name + routine.plain_name() + result += "(" + ", ".join(routine.arguments()) + ", &command_queue, &event)" + NL + if_prefix = "el" + + result += " else:" + NL + result += " raise ValueError(\"PyCLBlast: Unrecognized data-type '%s'\" % dtype)" + NL + result += " if err != CLBlastSuccess:" + NL + result += " raise RuntimeError(\"PyCLBlast: 'CLBlastX" + routine.plain_name() + "' failed: %s\" % get_status_message(err))" + NL + result += " return cl.Event.from_int_ptr(event)" + NL + result += NL + + return result diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 052709ee..d0b0a6d7 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -815,6 +815,25 @@ class Routine: list(chain(*[self.scalar_doc(s) for s in self.other_scalars()])) + self.batch_count_doc()) + def arguments_python(self): + """Arguments for the Python wrapper pyclblast""" + result = list() + result.extend(self.sizes) + buffers = self.inputs + self.outputs + result.extend(buffers[:]) + for buf in buffers: + if buf in self.buffers_matrix(): + result.append(buf + "_ld") + for buf in buffers: + if buf in self.buffers_vector(): + result.append(buf + "_inc = 1") + for option in self.options: + default = convert.option_to_clblastdefault(option) + result.append(option + " = " + default) + for buf in buffers: + result.append(buf + "_offset = 0") + return result + def requirements_doc(self): """Retrieves a list of routine requirements for documentation""" return self.requirements diff --git a/src/pyclblast/pyclblast/pyclblast.pyx b/src/pyclblast/pyclblast/pyclblast.pyx index 0cc3b237..a090d367 100644 --- a/src/pyclblast/pyclblast/pyclblast.pyx +++ b/src/pyclblast/pyclblast/pyclblast.pyx @@ -1,7 +1,6 @@ #################################################################################################### # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. -# This file follows uses a max-width of 100 characters per line. # # Author(s): # Cedric Nugteren @@ -287,87 +286,38 @@ def check_vector(a, name): check_array(a, 1, name) -def check_shape_dim(shape, dim, target, name): - if shape[dim] != target: - raise ValueError("PyCLBlast: '%s.shape[%d]' must be %d (got %d)" % (name, dim, target, shape[dim])) - #################################################################################################### # Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP #################################################################################################### cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSswap( - const size_t n, - cl_mem x_buffer, - const size_t x_offset, - const size_t x_inc, - cl_mem y_buffer, - const size_t y_offset, - const size_t y_inc, - cl_command_queue* queue, - cl_event* event) - CLBlastStatusCode CLBlastDswap( - const size_t n, - cl_mem x_buffer, - const size_t x_offset, - const size_t x_inc, - cl_mem y_buffer, - const size_t y_offset, - const size_t y_inc, - cl_command_queue* queue, - cl_event* event) - CLBlastStatusCode CLBlastCswap( - const size_t n, - cl_mem x_buffer, - const size_t x_offset, - const size_t x_inc, - cl_mem y_buffer, - const size_t y_offset, - const size_t y_inc, - cl_command_queue* queue, - cl_event* event) - CLBlastStatusCode CLBlastZswap( - const size_t n, - cl_mem x_buffer, - const size_t x_offset, - const size_t x_inc, - cl_mem y_buffer, - const size_t y_offset, - const size_t y_inc, - cl_command_queue* queue, - cl_event* event) + CLBlastStatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) -def swap(queue, x, y): - """y, x = x, y""" +def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) check_vector(x, "x") check_vector(y, "y") - cdef size_t N = x.shape[0] - check_shape_dim(y.shape, 0, N, "y") - - cdef size_t element_size = dtype_size[dtype] - cdef cl_mem xdata = x.base_data.int_ptr - cdef size_t offx = x.offset / element_size - cdef int incx = x.strides[0] / element_size - cdef cl_mem ydata = y.base_data.int_ptr - cdef size_t offy = y.offset / element_size - cdef int incy = y.strides[0] / element_size + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_command_queue commandQueue = queue.int_ptr + cdef cl_command_queue command_queue = queue.int_ptr cdef cl_event event = NULL - cdef CLBlastStatusCode + cdef CLBlastStatusCode err if dtype == np.dtype("float32"): - err = CLBlastSswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + err = CLBlastSswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("float64"): - err = CLBlastDswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + err = CLBlastDswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex64"): - err = CLBlastCswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + err = CLBlastCswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): - err = CLBlastZswap(N, xdata, offx, incx, ydata, offy, incy, &commandQueue, &event) + err = CLBlastZswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) else: - raise ValueError("PyCLBlast: Unrecognized dtype '%s'" % dtype) + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err)) return cl.Event.from_int_ptr(event) -- cgit v1.2.3 From e1bfb4082716ef9619a13e9985aca9ef28cf4cbf Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 16:33:20 +0100 Subject: Added GEMM to the Python wrapper --- scripts/generator/generator/convert.py | 13 ----------- scripts/generator/generator/pyclblast.py | 37 ++++++++++++++++++++++++++--- scripts/generator/generator/routine.py | 17 ++++++++++++-- src/pyclblast/pyclblast/pyclblast.pyx | 40 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 18 deletions(-) diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index 44eb69d6..07f45669 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -80,16 +80,3 @@ def option_to_documentation(x): 'triangle': "The part of the array of the triangular matrix to be used, either `Triangle::kUpper` (121) or `Triangle::kLower` (122).", 'diagonal': "The property of the diagonal matrix, either `Diagonal::kNonUnit` (131) for non-unit values on the diagonal or `Diagonal::kUnit` (132) for unit values on the diagonal.", }[x] - - -def option_to_clblastdefault(x): - """Translates an option name to a CLBlast C default type""" - return { - 'layout': "CLBlastLayoutColMajor", - 'a_transpose': "CLBlastTransposeNo", - 'b_transpose': "CLBlastTransposeNo", - 'ab_transpose': "CLBlastTransposeNo", - 'side': "CLBlastSideLeft", - 'triangle': "CLBlastTriangleUpper", - 'diagonal': "CLBlastDiagonalNonUnit", - }[x] diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py index 089a410a..ffeaab8d 100644 --- a/scripts/generator/generator/pyclblast.py +++ b/scripts/generator/generator/pyclblast.py @@ -21,9 +21,21 @@ def to_np_dtype(flavour): raise RuntimeError("Could not convert flavour '%s' to numpy" % flavour.precision_name) +def scalar_cython_conversion(scalar, flavour): + if flavour.precision_name == "S": + return "" + scalar + if flavour.precision_name == "D": + return "" + scalar + if flavour.precision_name == "C": + return "cl_float2(x=" + scalar + ".real,y=" + scalar + ".imag)" + if flavour.precision_name == "Z": + return "cl_double2(x=" + scalar + ".real,y=" + scalar + ".imag)" + raise RuntimeError("Could not convert flavour '%s'" % flavour.precision_name) + + def generate_pyx(routine): result = "" - if routine.implemented and routine.plain_name() == "swap": # TODO: Generalize + if routine.implemented and routine.plain_name() in ["swap", "gemm"]: # TODO: Generalize result += SEPARATOR + NL result += "# " + routine.description + ": " + routine.short_names() + NL @@ -59,16 +71,35 @@ def generate_pyx(routine): result += " cdef cl_command_queue command_queue = queue.int_ptr" + NL result += " cdef cl_event event = NULL" + NL - result += "" + NL + for option in routine.options: + if option == "a_transpose": + result += " a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo" + NL + if option == "b_transpose": + result += " b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo" + NL + if option == "ab_transpose": + result += " ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo" + NL + if option == "side": + result += " side = CLBlastSideRight if right_side else CLBlastSideLeft" + NL + if option == "triangle": + result += " triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper" + NL + if option == "diagonal": + result += " diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit" + NL + + result += "" + NL result += " cdef CLBlastStatusCode err" + NL if_prefix = "" for flavour in routine.flavours: if flavour.precision_name in ["S", "D", "C", "Z"]: np_dtype = to_np_dtype(flavour) + argument_names = [x. + replace("layout", "CLBlastLayoutRowMajor"). + replace("alpha", scalar_cython_conversion("alpha", flavour)). + replace("beta", scalar_cython_conversion("beta", flavour)) + for x in routine.arguments()] result += " " + if_prefix + "if dtype == np.dtype(\"" + np_dtype + "\"):" + NL result += " err = CLBlast" + flavour.name + routine.plain_name() - result += "(" + ", ".join(routine.arguments()) + ", &command_queue, &event)" + NL + result += "(" + ", ".join(argument_names) + ", &command_queue, &event)" + NL if_prefix = "el" result += " else:" + NL diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index d0b0a6d7..c52f49ca 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -827,9 +827,22 @@ class Routine: for buf in buffers: if buf in self.buffers_vector(): result.append(buf + "_inc = 1") + for scalar in self.scalars: + default = "1.0" if scalar == "alpha" else "0.0" + result.append(scalar + " = " + default) for option in self.options: - default = convert.option_to_clblastdefault(option) - result.append(option + " = " + default) + if option == "a_transpose": + result.append("a_transp = False") + if option == "b_transpose": + result.append("b_transp = False") + if option == "ab_transpose": + result.append("ab_transp = False") + if option == "side": + result.append("right_side = False") + if option == "triangle": + result.append("lower_triangle = False") + if option == "diagonal": + result.append("unit_diagonal = False") for buf in buffers: result.append(buf + "_offset = 0") return result diff --git a/src/pyclblast/pyclblast/pyclblast.pyx b/src/pyclblast/pyclblast/pyclblast.pyx index a090d367..2f6ebba2 100644 --- a/src/pyclblast/pyclblast/pyclblast.pyx +++ b/src/pyclblast/pyclblast/pyclblast.pyx @@ -323,3 +323,43 @@ def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): return cl.Event.from_int_ptr(event) #################################################################################################### +# General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgemm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### -- cgit v1.2.3 From a66e24a009fe6f23c6231ec3b7c1a4698f831435 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 17:34:10 +0100 Subject: Added all other level 1/2/3 routines to pyclblast --- scripts/generator/generator/pyclblast.py | 13 +- src/pyclblast/pyclblast/pyclblast.pyx | 1536 ++++++++++++++++++++++++++++++ 2 files changed, 1543 insertions(+), 6 deletions(-) diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py index ffeaab8d..85bcc93f 100644 --- a/scripts/generator/generator/pyclblast.py +++ b/scripts/generator/generator/pyclblast.py @@ -22,20 +22,21 @@ def to_np_dtype(flavour): def scalar_cython_conversion(scalar, flavour): - if flavour.precision_name == "S": + scalar_type = flavour.alpha_cl if scalar == "alpha" else flavour.beta_cl + if scalar_type == "float": return "" + scalar - if flavour.precision_name == "D": + if scalar_type == "double": return "" + scalar - if flavour.precision_name == "C": + if scalar_type in ["cl_float2", "float2"]: return "cl_float2(x=" + scalar + ".real,y=" + scalar + ".imag)" - if flavour.precision_name == "Z": + if scalar_type in ["cl_double2", "double2"]: return "cl_double2(x=" + scalar + ".real,y=" + scalar + ".imag)" - raise RuntimeError("Could not convert flavour '%s'" % flavour.precision_name) + raise RuntimeError("Could not convert flavour '%s:%s'" % (flavour.precision_name, scalar_type)) def generate_pyx(routine): result = "" - if routine.implemented and routine.plain_name() in ["swap", "gemm"]: # TODO: Generalize + if routine.implemented and routine.plain_name() and routine.level in ["1", "2a", "2b", "3"]: result += SEPARATOR + NL result += "# " + routine.description + ": " + routine.short_names() + NL diff --git a/src/pyclblast/pyclblast/pyclblast.pyx b/src/pyclblast/pyclblast/pyclblast.pyx index 2f6ebba2..9529400c 100644 --- a/src/pyclblast/pyclblast/pyclblast.pyx +++ b/src/pyclblast/pyclblast/pyclblast.pyx @@ -322,6 +322,1244 @@ def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err)) return cl.Event.from_int_ptr(event) +#################################################################################################### +# Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSscal(const size_t n, const float alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDscal(const size_t n, const double alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCscal(const size_t n, const cl_float2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0): + dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCscal(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZscal(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXscal' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastScopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastScopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXcopy' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSaxpy(const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDaxpy(const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCaxpy(const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0): + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCaxpy(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZaxpy(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXaxpy' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two vectors: SDOT/DDOT/HDOT +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdot' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two complex vectors: CDOTU/ZDOTU +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dotu(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdotu' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dotc(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdotc' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0): + dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(nrm2, "nrm2") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem nrm2_buffer = nrm2.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDznrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXnrm2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0): + dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(asum, "asum") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem asum_buffer = asum.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDzasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXasum' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0): + dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(sum, "sum") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem sum_buffer = sum.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDzsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsum' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imax, "imax") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imax_buffer = imax.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXamax' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imin, "imin") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imin_buffer = imin.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXamin' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imax, "imax") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imax_buffer = imax.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXmax' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imin, "imin") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imin_buffer = imin.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXmin' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgemv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian matrix-vector multiplication: CHEMV/ZHEMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hemv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChemv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhemv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhemv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([ap, x, y], ["complex64", "complex128"]) + check_matrix(ap, "ap") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpmv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpmv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsymv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([ap, x, y], ["float32", "float64"]) + check_matrix(ap, "ap") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0): + dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(ap, "ap") + check_vector(x, "x") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtpmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrsv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 matrix update: SGER/DGER/HGER +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSger(const CLBlastLayout layout, const size_t m, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXger' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 complex matrix update: CGERU/ZGERU +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def geru(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCgeru(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgeru(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgeru' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 complex conjugated matrix update: CGERC/ZGERC +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def gerc(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCgerc(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgerc(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgerc' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian rank-1 matrix update: CHER/ZHER +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def her(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): + dtype = check_dtype([x, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed rank-1 matrix update: CHPR/ZHPR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def hpr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): + dtype = check_dtype([x, ap], ["complex64", "complex128"]) + check_vector(x, "x") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian rank-2 matrix update: CHER2/ZHER2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def her2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): + dtype = check_dtype([x, y, ap], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpr2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpr2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric rank-1 matrix update: SSYR/DSYR/HSYR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): + dtype = check_dtype([x, a], ["float32", "float64"]) + check_vector(x, "x") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): + dtype = check_dtype([x, ap], ["float32", "float64"]) + check_vector(x, "x") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): + dtype = check_dtype([x, y, ap], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + #################################################################################################### # General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM #################################################################################################### @@ -363,3 +1601,301 @@ def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_t return cl.Event.from_int_ptr(event) #################################################################################################### +# Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsymm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian matrix-matrix multiplication: CHEMM/ZHEMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def hemm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhemm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): + dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyrk' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-K update of a hermitian matrix: CHERK/ZHERK +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def herk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): + dtype = check_dtype([a, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXherk' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr2k' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-2K update of a hermitian matrix: CHER2K/ZHER2K +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def her2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher2k' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + +def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): + dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrmm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + +def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): + dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrsm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### -- cgit v1.2.3 From 76c21a95c29bd3645213b870de61e012204cc844 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 17:59:43 +0100 Subject: Added PyCLBlast samples --- src/pyclblast/samples/saxpy.py | 36 ++++++++++++++++++++++++++++++++++++ src/pyclblast/samples/sgemm.py | 38 ++++++++++++++++++++++++++++++++++++++ src/pyclblast/samples/sgemv.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 src/pyclblast/samples/saxpy.py create mode 100644 src/pyclblast/samples/sgemm.py create mode 100644 src/pyclblast/samples/sgemv.py diff --git a/src/pyclblast/samples/saxpy.py b/src/pyclblast/samples/saxpy.py new file mode 100644 index 00000000..098e44d5 --- /dev/null +++ b/src/pyclblast/samples/saxpy.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array +import pyclblast + +# Settings for this sample +dtype = 'float32' +alpha = 1.5 +n = 4 + +print("# Setting up OpenCL") +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +print("# Setting up Numpy arrays") +x = np.random.rand(n).astype(dtype=dtype) +y = np.random.rand(n).astype(dtype=dtype) + +print("# Setting up OpenCL arrays") +clx = Array(queue, x.shape, x.dtype) +cly = Array(queue, y.shape, y.dtype) +clx.set(x) +cly.set(y) + +print("# Example level-1 operation: AXPY") +pyclblast.axpy(queue, n, clx, cly, alpha=alpha) +print("# Result for vector y: %s" % cly.get()) +print("# Expected result: %s" % (alpha * x + y)) diff --git a/src/pyclblast/samples/sgemm.py b/src/pyclblast/samples/sgemm.py new file mode 100644 index 00000000..c872553f --- /dev/null +++ b/src/pyclblast/samples/sgemm.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array +import pyclblast + +# Settings for this sample +dtype = 'float32' + +print("# Setting up OpenCL") +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +print("# Setting up Numpy arrays") +m, n, k = 2, 3, 4 +a = np.random.rand(m, k).astype(dtype=dtype) +b = np.random.rand(k, n).astype(dtype=dtype) +c = np.random.rand(m, n).astype(dtype=dtype) + +print("# Setting up OpenCL arrays") +cla = Array(queue, a.shape, a.dtype) +clb = Array(queue, b.shape, b.dtype) +clc = Array(queue, c.shape, c.dtype) +cla.set(a) +clb.set(b) +clc.set(c) + +print("# Example level-3 operation: GEMM") +pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n) +print("# Matrix C result: %s" % clc.get()) +print("# Expected result: %s" % (np.dot(a, b))) diff --git a/src/pyclblast/samples/sgemv.py b/src/pyclblast/samples/sgemv.py new file mode 100644 index 00000000..196c838d --- /dev/null +++ b/src/pyclblast/samples/sgemv.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array +import pyclblast + +# Settings for this sample +dtype = 'float32' +m, n = 4, 3 +alpha = 1.0 +beta = 0.0 + +print("# Setting up OpenCL") +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +print("# Setting up Numpy arrays") +a = np.random.rand(m, n).astype(dtype=dtype) +x = np.random.rand(n).astype(dtype=dtype) +y = np.random.rand(m).astype(dtype=dtype) + +print("# Setting up OpenCL arrays") +cla = Array(queue, a.shape, a.dtype) +clx = Array(queue, x.shape, x.dtype) +cly = Array(queue, y.shape, y.dtype) +cla.set(a) +clx.set(x) +cly.set(y) + +print("# Example level-2 operation: GEMV") +pyclblast.gemv(queue, m, n, cla, clx, cly, a_ld=n, alpha=alpha, beta=beta) +print("# Result for vector y: %s" % cly.get()) +print("# Expected result: %s" % (alpha * np.dot(a, x) + beta * y)) -- cgit v1.2.3 From ce5e2a1e008bee3512a9e17db4d6ded17d2141e6 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 18:01:02 +0100 Subject: Prepared PyCLBlast for release as a package on PyPi --- scripts/generator/generator.py | 2 +- src/pyclblast/MANIFEST.in | 2 + src/pyclblast/README.md | 31 + src/pyclblast/pyclblast/pyclblast.pyx | 1901 --------------------------------- src/pyclblast/setup.py | 17 +- src/pyclblast/src/pyclblast.pyx | 1901 +++++++++++++++++++++++++++++++++ 6 files changed, 1948 insertions(+), 1906 deletions(-) create mode 100644 src/pyclblast/MANIFEST.in create mode 100644 src/pyclblast/README.md delete mode 100644 src/pyclblast/pyclblast/pyclblast.pyx create mode 100644 src/pyclblast/src/pyclblast.pyx diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index c25d0e4f..8c071ab3 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -47,7 +47,7 @@ FILES = [ "/src/clblast_netlib_c.cpp", "/include/clblast_cuda.h", "/src/clblast_cuda.cpp", - "/src/pyclblast/pyclblast/pyclblast.pyx" + "/src/pyclblast/src/pyclblast.pyx" ] HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21, 288] FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55, 1] diff --git a/src/pyclblast/MANIFEST.in b/src/pyclblast/MANIFEST.in new file mode 100644 index 00000000..fb20923f --- /dev/null +++ b/src/pyclblast/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md setup.py src/*.pyx +include samples/*.py diff --git a/src/pyclblast/README.md b/src/pyclblast/README.md new file mode 100644 index 00000000..be37af01 --- /dev/null +++ b/src/pyclblast/README.md @@ -0,0 +1,31 @@ + +PyCLBlast: Python wrappers for the tuned OpenCL BLAS library CLBlast +================ + +This Python package provides a straightforward wrapper for CLBast based on PyOpenCL. CLBlast is a modern, lightweight, performant and tunable OpenCL BLAS library written in C++11. It is designed to leverage the full performance potential of a wide variety of OpenCL devices from different vendors, including desktop and laptop GPUs, embedded GPUs, and other accelerators. CLBlast implements BLAS routines: basic linear algebra subprograms operating on vectors and matrices. + +See [the CLBlast repository](https://github.com/CNugteren/CLBlast) and [the CLBlast website](https://cnugteren.github.io/clblast) for more information about CLBlast. + + +Prerequisites +------------- + +Non-Python requirements: + +* OpenCL +* [CLBlast](https://github.com/CNugteren/CLBlast) + +Python requirements: + +* Cython +* [PyOpenCL](https://github.com/pyopencl/pyopencl/) + + +Getting started +------------- + +After installation OpenCL and CLBlast, simply use pip to install PyCLBlast, e.g.: + + pip install --user pyclblast + +To start using the library, browse the [CLBlast](https://github.com/CNugteren/CLBlast) documentation or check out the PyCLBlast samples provides in the `samples` subfolder. diff --git a/src/pyclblast/pyclblast/pyclblast.pyx b/src/pyclblast/pyclblast/pyclblast.pyx deleted file mode 100644 index 9529400c..00000000 --- a/src/pyclblast/pyclblast/pyclblast.pyx +++ /dev/null @@ -1,1901 +0,0 @@ - -#################################################################################################### -# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. -# -# Author(s): -# Cedric Nugteren -# -# This file defines the Python interface to CLBlast. It is inspired by: -# https://github.com/hunse/pyopencl_blas -# -#################################################################################################### - -import numpy as np -import pyopencl as cl -from pyopencl.array import Array - -from libcpp cimport bool - -#################################################################################################### -# CLBlast and OpenCL data-types -#################################################################################################### - -cdef extern from "clblast_c.h": - - # Status codes - ctypedef enum CLBlastStatusCode: - CLBlastSuccess - CLBlastOpenCLCompilerNotAvailable - CLBlastTempBufferAllocFailure - CLBlastOpenCLOutOfResources - CLBlastOpenCLOutOfHostMemory - CLBlastOpenCLBuildProgramFailure - CLBlastInvalidValue - CLBlastInvalidCommandQueue - CLBlastInvalidMemObject - CLBlastInvalidBinary - CLBlastInvalidBuildOptions - CLBlastInvalidProgram - CLBlastInvalidProgramExecutable - CLBlastInvalidKernelName - CLBlastInvalidKernelDefinition - CLBlastInvalidKernel - CLBlastInvalidArgIndex - CLBlastInvalidArgValue - CLBlastInvalidArgSize - CLBlastInvalidKernelArgs - CLBlastInvalidLocalNumDimensions - CLBlastInvalidLocalThreadsTotal - CLBlastInvalidLocalThreadsDim - CLBlastInvalidGlobalOffset - CLBlastInvalidEventWaitList - CLBlastInvalidEvent - CLBlastInvalidOperation - CLBlastInvalidBufferSize - CLBlastInvalidGlobalWorkSize - CLBlastNotImplemented - CLBlastInvalidMatrixA - CLBlastInvalidMatrixB - CLBlastInvalidMatrixC - CLBlastInvalidVectorX - CLBlastInvalidVectorY - CLBlastInvalidDimension - CLBlastInvalidLeadDimA - CLBlastInvalidLeadDimB - CLBlastInvalidLeadDimC - CLBlastInvalidIncrementX - CLBlastInvalidIncrementY - CLBlastInsufficientMemoryA - CLBlastInsufficientMemoryB - CLBlastInsufficientMemoryC - CLBlastInsufficientMemoryX - CLBlastInsufficientMemoryY - CLBlastInvalidBatchCount - CLBlastInvalidOverrideKernel - CLBlastMissingOverrideParameter - CLBlastInvalidLocalMemUsage - CLBlastNoHalfPrecision - CLBlastNoDoublePrecision - CLBlastInvalidVectorScalar - CLBlastInsufficientMemoryScalar - CLBlastDatabaseError - CLBlastUnknownError - CLBlastUnexpectedError - - # OpenCL data-types - ctypedef float cl_float - ctypedef double cl_double - ctypedef unsigned int cl_uint - ctypedef struct cl_float2: - cl_float x - cl_float y - ctypedef struct cl_double2: - cl_double x - cl_double y - - # OpenCL special data-types - struct _cl_mem: - pass - struct _cl_command_queue: - pass - struct _cl_event: - pass - ctypedef _cl_mem* cl_mem - ctypedef _cl_command_queue* cl_command_queue - ctypedef _cl_event* cl_event - - # Matrix layout and transpose types - ctypedef enum CLBlastLayout: - CLBlastLayoutRowMajor - CLBlastLayoutColMajor - ctypedef enum CLBlastTranspose: - CLBlastTransposeNo - CLBlastTransposeYes - CLBlastTransposeConjugate - ctypedef enum CLBlastTriangle: - CLBlastTriangleUpper - CLBlastTriangleLower - ctypedef enum CLBlastDiagonal: - CLBlastDiagonalNonUnit - CLBlastDiagonalUnit - ctypedef enum CLBlastSide: - CLBlastSideLeft - CLBlastSideRight - - # Precision enum - ctypedef enum CLBlastPrecision: - CLBlastPrecisionSingle - CLBlastPrecisionDouble - CLBlastPrecisionComplexSingle - CLBlastPrecisionComplexDouble - -# Translates status codes into readable messages -cdef get_status_message(CLBlastStatusCode status): - if status == CLBlastSuccess: - return "CLBlastSuccess" - if status == CLBlastOpenCLCompilerNotAvailable: - return "CLBlastOpenCLCompilerNotAvailable: CL_COMPILER_NOT_AVAILABLE" - if status == CLBlastTempBufferAllocFailure: - return "CLBlastTempBufferAllocFailure: CL_MEM_OBJECT_ALLOCATION_FAILURE" - if status == CLBlastOpenCLOutOfResources: - return "CLBlastOpenCLOutOfResources: CL_OUT_OF_RESOURCES" - if status == CLBlastOpenCLOutOfHostMemory: - return "CLBlastOpenCLOutOfHostMemory: CL_OUT_OF_HOST_MEMORY" - if status == CLBlastOpenCLBuildProgramFailure: - return "CLBlastOpenCLBuildProgramFailure: CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error" - if status == CLBlastInvalidValue: - return "CLBlastInvalidValue: CL_INVALID_VALUE" - if status == CLBlastInvalidCommandQueue: - return "CLBlastInvalidCommandQueue: CL_INVALID_COMMAND_QUEUE" - if status == CLBlastInvalidMemObject: - return "CLBlastInvalidMemObject: CL_INVALID_MEM_OBJECT" - if status == CLBlastInvalidBinary: - return "CLBlastInvalidBinary: CL_INVALID_BINARY" - if status == CLBlastInvalidBuildOptions: - return "CLBlastInvalidBuildOptions: CL_INVALID_BUILD_OPTIONS" - if status == CLBlastInvalidProgram: - return "CLBlastInvalidProgram: CL_INVALID_PROGRAM" - if status == CLBlastInvalidProgramExecutable: - return "CLBlastInvalidProgramExecutable: CL_INVALID_PROGRAM_EXECUTABLE" - if status == CLBlastInvalidKernelName: - return "CLBlastInvalidKernelName: CL_INVALID_KERNEL_NAME" - if status == CLBlastInvalidKernelDefinition: - return "CLBlastInvalidKernelDefinition: CL_INVALID_KERNEL_DEFINITION" - if status == CLBlastInvalidKernel: - return "CLBlastInvalidKernel: CL_INVALID_KERNEL" - if status == CLBlastInvalidArgIndex: - return "CLBlastInvalidArgIndex: CL_INVALID_ARG_INDEX" - if status == CLBlastInvalidArgValue: - return "CLBlastInvalidArgValue: CL_INVALID_ARG_VALUE" - if status == CLBlastInvalidArgSize: - return "CLBlastInvalidArgSize: CL_INVALID_ARG_SIZE" - if status == CLBlastInvalidKernelArgs: - return "CLBlastInvalidKernelArgs: CL_INVALID_KERNEL_ARGS" - if status == CLBlastInvalidLocalNumDimensions: - return "CLBlastInvalidLocalNumDimensions: CL_INVALID_WORK_DIMENSION: Too many thread dimensions" - if status == CLBlastInvalidLocalThreadsTotal: - return "CLBlastInvalidLocalThreadsTotal: CL_INVALID_WORK_GROUP_SIZE: Too many threads in total" - if status == CLBlastInvalidLocalThreadsDim: - return "CLBlastInvalidLocalThreadsDim: CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension" - if status == CLBlastInvalidGlobalOffset: - return "CLBlastInvalidGlobalOffset: CL_INVALID_GLOBAL_OFFSET" - if status == CLBlastInvalidEventWaitList: - return "CLBlastInvalidEventWaitList: CL_INVALID_EVENT_WAIT_LIST" - if status == CLBlastInvalidEvent: - return "CLBlastInvalidEvent: CL_INVALID_EVENT" - if status == CLBlastInvalidOperation: - return "CLBlastInvalidOperation: CL_INVALID_OPERATION" - if status == CLBlastInvalidBufferSize: - return "CLBlastInvalidBufferSize: CL_INVALID_BUFFER_SIZE" - if status == CLBlastInvalidGlobalWorkSize: - return "CLBlastInvalidGlobalWorkSize: CL_INVALID_GLOBAL_WORK_SIZE" - if status == CLBlastNotImplemented: - return "CLBlastNotImplemented: Routine or functionality not implemented yet" - if status == CLBlastInvalidMatrixA: - return "CLBlastInvalidMatrixA: Matrix A is not a valid OpenCL buffer" - if status == CLBlastInvalidMatrixB: - return "CLBlastInvalidMatrixB: Matrix B is not a valid OpenCL buffer" - if status == CLBlastInvalidMatrixC: - return "CLBlastInvalidMatrixC: Matrix C is not a valid OpenCL buffer" - if status == CLBlastInvalidVectorX: - return "CLBlastInvalidVectorX: Vector X is not a valid OpenCL buffer" - if status == CLBlastInvalidVectorY: - return "CLBlastInvalidVectorY: Vector Y is not a valid OpenCL buffer" - if status == CLBlastInvalidDimension: - return "CLBlastInvalidDimension: Dimensions M, N, and K have to be larger than zero" - if status == CLBlastInvalidLeadDimA: - return "CLBlastInvalidLeadDimA: LD of A is smaller than the matrix's first dimension" - if status == CLBlastInvalidLeadDimB: - return "CLBlastInvalidLeadDimB: LD of B is smaller than the matrix's first dimension" - if status == CLBlastInvalidLeadDimC: - return "CLBlastInvalidLeadDimC: LD of C is smaller than the matrix's first dimension" - if status == CLBlastInvalidIncrementX: - return "CLBlastInvalidIncrementX: Increment of vector X cannot be zero" - if status == CLBlastInvalidIncrementY: - return "CLBlastInvalidIncrementY: Increment of vector Y cannot be zero" - if status == CLBlastInsufficientMemoryA: - return "CLBlastInsufficientMemoryA: Matrix A's OpenCL buffer is too small" - if status == CLBlastInsufficientMemoryB: - return "CLBlastInsufficientMemoryB: Matrix B's OpenCL buffer is too small" - if status == CLBlastInsufficientMemoryC: - return "CLBlastInsufficientMemoryC: Matrix C's OpenCL buffer is too small" - if status == CLBlastInsufficientMemoryX: - return "CLBlastInsufficientMemoryX: Vector X's OpenCL buffer is too small" - if status == CLBlastInsufficientMemoryY: - return "CLBlastInsufficientMemoryY: Vector Y's OpenCL buffer is too small" - if status == CLBlastInvalidBatchCount: - return "CLBlastInvalidBatchCount: The batch count needs to be positive" - if status == CLBlastInvalidOverrideKernel: - return "CLBlastInvalidOverrideKernel: Trying to override parameters for an invalid kernel" - if status == CLBlastMissingOverrideParameter: - return "CLBlastMissingOverrideParameter: Missing override parameter(s) for the target kernel" - if status == CLBlastInvalidLocalMemUsage: - return "CLBlastInvalidLocalMemUsage: Not enough local memory available on this device" - if status == CLBlastNoHalfPrecision: - return "CLBlastNoHalfPrecision: Half precision (16-bits) not supported by the device" - if status == CLBlastNoDoublePrecision: - return "CLBlastNoDoublePrecision: Double precision (64-bits) not supported by the device" - if status == CLBlastInvalidVectorScalar: - return "CLBlastInvalidVectorScalar: The unit-sized vector is not a valid OpenCL buffer" - if status == CLBlastInsufficientMemoryScalar: - return "CLBlastInsufficientMemoryScalar: The unit-sized vector's OpenCL buffer is too small" - if status == CLBlastDatabaseError: - return "CLBlastDatabaseError: Entry for the device was not found in the database" - if status == CLBlastUnknownError: - return "CLBlastUnknownError: A catch-all error code representing an unspecified error" - if status == CLBlastUnexpectedError: - return "CLBlastUnexpectedError: A catch-all error code representing an unexpected exception" - return "PyCLBlast: unrecognized CLBlast status code (code %d)" % status - -#################################################################################################### -# Generic helpers -#################################################################################################### - -dtype_size = {np.dtype('float32'): 4, - np.dtype('float64'): 8, - np.dtype('complex64'): 8, - np.dtype('complex128'): 16} - -def dtypes_str(dtypes): - if len(dtypes) == 1: - return "'%s'" % dtypes[0] - return "one of %s" % dtypes - - -def check_dtype(args, dtypes): - dtype = args[0].dtype - if not all(arg.dtype == dtype for arg in args): - raise ValueError("PyCLBlast: All arguments must have the same dtype (%s)" % dtypes_str(dtypes)) - if dtype not in dtypes: - raise ValueError("PyCLBlast: Data type must be %s" % dtypes_str(dtypes)) - return dtype - - -def check_array(a, ndim, name): - if not isinstance(a, Array): - raise ValueError("PyCLBlast: '%s' must be a PyOpenCL Array" % name) - if not len(a.shape) == ndim: - raise ValueError("PyCLBlast: '%s' must have %d dimensions (got %d)" % (name, ndim, len(a.shape))) - - -def check_matrix(a, name): - check_array(a, 2, name) - - -def check_vector(a, name): - check_array(a, 1, name) - - -#################################################################################################### -# Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSscal(const size_t n, const float alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDscal(const size_t n, const double alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCscal(const size_t n, const cl_float2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0): - dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - - cdef cl_mem x_buffer = x.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCscal(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZscal(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXscal' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastScopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastScopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXcopy' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSaxpy(const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDaxpy(const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCaxpy(const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0): - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCaxpy(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZaxpy(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXaxpy' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Dot product of two vectors: SDOT/DDOT/HDOT -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): - dtype = check_dtype([x, y, dot], ["float32", "float64"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(dot, "dot") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem dot_buffer = dot.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXdot' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Dot product of two complex vectors: CDOTU/ZDOTU -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def dotu(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): - dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(dot, "dot") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem dot_buffer = dot.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXdotu' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def dotc(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): - dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(dot, "dot") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem dot_buffer = dot.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXdotc' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastScnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0): - dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(nrm2, "nrm2") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem nrm2_buffer = nrm2.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastScnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastDznrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXnrm2' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastScasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0): - dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(asum, "asum") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem asum_buffer = asum.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastScasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastDzasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXasum' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastScsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0): - dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(sum, "sum") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem sum_buffer = sum.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastScsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastDzsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsum' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastiSamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiDamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiCamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): - dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(imax, "imax") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem imax_buffer = imax.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastiSamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastiDamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastiCamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastiZamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXamax' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastiSamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiDamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiCamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): - dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(imin, "imin") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem imin_buffer = imin.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastiSamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastiDamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastiCamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastiZamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXamin' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastiSmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiDmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiCmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): - dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(imax, "imax") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem imax_buffer = imax.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastiSmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastiDmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastiCmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastiZmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXmax' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastiSmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiDmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiCmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): - dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) - check_vector(x, "x") - check_matrix(imin, "imin") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem imin_buffer = imin.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastiSmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastiDmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastiCmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastiZmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXmin' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXgemv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXgbmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian matrix-vector multiplication: CHEMV/ZHEMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def hemv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChemv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhemv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhemv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def hbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhbmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([ap, x, y], ["complex64", "complex128"]) - check_matrix(ap, "ap") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem ap_buffer = ap.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChpmv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhpmv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhpmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["float32", "float64"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsymv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([a, x, y], ["float32", "float64"]) - check_matrix(a, "a") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsbmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) - -def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): - dtype = check_dtype([ap, x, y], ["float32", "float64"]) - check_matrix(ap, "ap") - check_vector(x, "x") - check_vector(y, "y") - - cdef cl_mem ap_buffer = ap.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXspmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): - dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtrmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): - dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtbmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0): - dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"]) - check_matrix(ap, "ap") - check_vector(x, "x") - - cdef cl_mem ap_buffer = ap.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtpmv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) - -def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): - dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_vector(x, "x") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem x_buffer = x.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtrsv' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General rank-1 matrix update: SGER/DGER/HGER -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSger(const CLBlastLayout layout, const size_t m, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): - dtype = check_dtype([x, y, a], ["float32", "float64"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXger' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General rank-1 complex matrix update: CGERU/ZGERU -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def geru(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): - dtype = check_dtype([x, y, a], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCgeru(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZgeru(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXgeru' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General rank-1 complex conjugated matrix update: CGERC/ZGERC -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def gerc(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): - dtype = check_dtype([x, y, a], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCgerc(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZgerc(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXgerc' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian rank-1 matrix update: CHER/ZHER -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def her(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): - dtype = check_dtype([x, a], ["complex64", "complex128"]) - check_vector(x, "x") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXher' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian packed rank-1 matrix update: CHPR/ZHPR -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - -def hpr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): - dtype = check_dtype([x, ap], ["complex64", "complex128"]) - check_vector(x, "x") - check_matrix(ap, "ap") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem ap_buffer = ap.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhpr' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian rank-2 matrix update: CHER2/ZHER2 -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def her2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): - dtype = check_dtype([x, y, a], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCher2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZher2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXher2' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - -def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): - dtype = check_dtype([x, y, ap], ["complex64", "complex128"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(ap, "ap") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem ap_buffer = ap.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChpr2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhpr2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhpr2' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric rank-1 matrix update: SSYR/DSYR/HSYR -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): - dtype = check_dtype([x, a], ["float32", "float64"]) - check_vector(x, "x") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsyr' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - -def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): - dtype = check_dtype([x, ap], ["float32", "float64"]) - check_vector(x, "x") - check_matrix(ap, "ap") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem ap_buffer = ap.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXspr' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) - -def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): - dtype = check_dtype([x, y, a], ["float32", "float64"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(a, "a") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem a_buffer = a.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsyr2' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) - -def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): - dtype = check_dtype([x, y, ap], ["float32", "float64"]) - check_vector(x, "x") - check_vector(y, "y") - check_matrix(ap, "ap") - - cdef cl_mem x_buffer = x.base_data.int_ptr - cdef cl_mem y_buffer = y.base_data.int_ptr - cdef cl_mem ap_buffer = ap.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXspr2' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXgemm' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - side = CLBlastSideRight if right_side else CLBlastSideLeft - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsymm' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Hermitian matrix-matrix multiplication: CHEMM/ZHEMM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastChemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def hemm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): - dtype = check_dtype([a, b, c], ["complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - side = CLBlastSideRight if right_side else CLBlastSideLeft - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastChemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZhemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXhemm' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): - dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsyrk' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Rank-K update of a hermitian matrix: CHERK/ZHERK -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def herk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): - dtype = check_dtype([a, c], ["complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXherk' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastSsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastSsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXsyr2k' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Rank-2K update of a hermitian matrix: CHER2K/ZHER2K -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastCher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) - -def her2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): - dtype = check_dtype([a, b, c], ["complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - check_matrix(c, "c") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - cdef cl_mem c_buffer = c.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo - - cdef CLBlastStatusCode err - if dtype == np.dtype("complex64"): - err = CLBlastCher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXher2k' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - -def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): - dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - side = CLBlastSideRight if right_side else CLBlastSideLeft - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtrmm' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### -# Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM -#################################################################################################### - -cdef extern from "clblast_c.h": - CLBlastStatusCode CLBlastStrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastDtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastCtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - CLBlastStatusCode CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) - -def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): - dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) - check_matrix(a, "a") - check_matrix(b, "b") - - cdef cl_mem a_buffer = a.base_data.int_ptr - cdef cl_mem b_buffer = b.base_data.int_ptr - - cdef cl_command_queue command_queue = queue.int_ptr - cdef cl_event event = NULL - side = CLBlastSideRight if right_side else CLBlastSideLeft - triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper - a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo - diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit - - cdef CLBlastStatusCode err - if dtype == np.dtype("float32"): - err = CLBlastStrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("float64"): - err = CLBlastDtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("complex64"): - err = CLBlastCtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - elif dtype == np.dtype("complex128"): - err = CLBlastZtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) - else: - raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) - if err != CLBlastSuccess: - raise RuntimeError("PyCLBlast: 'CLBlastXtrsm' failed: %s" % get_status_message(err)) - return cl.Event.from_int_ptr(event) - -#################################################################################################### diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py index 2a90f16d..d5916e06 100644 --- a/src/pyclblast/setup.py +++ b/src/pyclblast/setup.py @@ -14,7 +14,7 @@ ext_modules = list() ext_modules.append( Extension( "pyclblast", - ["pyclblast/pyclblast.pyx"], + ["src/pyclblast.pyx"], libraries=["clblast"], language="c++" ) @@ -27,10 +27,19 @@ setup( author_email="web@cedricnugteren.nl", url="https://github.com/cnugteren/clblast", description="Python bindings for CLBlast, the tuned OpenCL BLAS library", - license="ApacheV2", - requires=["pyopencl","cython"], - packages=["pyclblast"], + license="Apache Software License", + requires=["numpy", "pyopencl", "cython"], + package_dir={'': 'src'}, scripts=[], ext_modules=ext_modules, cmdclass={"build_ext": build_ext}, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Libraries', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + ], + keywords="OpenCL BLAS CLBlast GEMM matrix-multiplication" ) diff --git a/src/pyclblast/src/pyclblast.pyx b/src/pyclblast/src/pyclblast.pyx new file mode 100644 index 00000000..9529400c --- /dev/null +++ b/src/pyclblast/src/pyclblast.pyx @@ -0,0 +1,1901 @@ + +#################################################################################################### +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# +# Author(s): +# Cedric Nugteren +# +# This file defines the Python interface to CLBlast. It is inspired by: +# https://github.com/hunse/pyopencl_blas +# +#################################################################################################### + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array + +from libcpp cimport bool + +#################################################################################################### +# CLBlast and OpenCL data-types +#################################################################################################### + +cdef extern from "clblast_c.h": + + # Status codes + ctypedef enum CLBlastStatusCode: + CLBlastSuccess + CLBlastOpenCLCompilerNotAvailable + CLBlastTempBufferAllocFailure + CLBlastOpenCLOutOfResources + CLBlastOpenCLOutOfHostMemory + CLBlastOpenCLBuildProgramFailure + CLBlastInvalidValue + CLBlastInvalidCommandQueue + CLBlastInvalidMemObject + CLBlastInvalidBinary + CLBlastInvalidBuildOptions + CLBlastInvalidProgram + CLBlastInvalidProgramExecutable + CLBlastInvalidKernelName + CLBlastInvalidKernelDefinition + CLBlastInvalidKernel + CLBlastInvalidArgIndex + CLBlastInvalidArgValue + CLBlastInvalidArgSize + CLBlastInvalidKernelArgs + CLBlastInvalidLocalNumDimensions + CLBlastInvalidLocalThreadsTotal + CLBlastInvalidLocalThreadsDim + CLBlastInvalidGlobalOffset + CLBlastInvalidEventWaitList + CLBlastInvalidEvent + CLBlastInvalidOperation + CLBlastInvalidBufferSize + CLBlastInvalidGlobalWorkSize + CLBlastNotImplemented + CLBlastInvalidMatrixA + CLBlastInvalidMatrixB + CLBlastInvalidMatrixC + CLBlastInvalidVectorX + CLBlastInvalidVectorY + CLBlastInvalidDimension + CLBlastInvalidLeadDimA + CLBlastInvalidLeadDimB + CLBlastInvalidLeadDimC + CLBlastInvalidIncrementX + CLBlastInvalidIncrementY + CLBlastInsufficientMemoryA + CLBlastInsufficientMemoryB + CLBlastInsufficientMemoryC + CLBlastInsufficientMemoryX + CLBlastInsufficientMemoryY + CLBlastInvalidBatchCount + CLBlastInvalidOverrideKernel + CLBlastMissingOverrideParameter + CLBlastInvalidLocalMemUsage + CLBlastNoHalfPrecision + CLBlastNoDoublePrecision + CLBlastInvalidVectorScalar + CLBlastInsufficientMemoryScalar + CLBlastDatabaseError + CLBlastUnknownError + CLBlastUnexpectedError + + # OpenCL data-types + ctypedef float cl_float + ctypedef double cl_double + ctypedef unsigned int cl_uint + ctypedef struct cl_float2: + cl_float x + cl_float y + ctypedef struct cl_double2: + cl_double x + cl_double y + + # OpenCL special data-types + struct _cl_mem: + pass + struct _cl_command_queue: + pass + struct _cl_event: + pass + ctypedef _cl_mem* cl_mem + ctypedef _cl_command_queue* cl_command_queue + ctypedef _cl_event* cl_event + + # Matrix layout and transpose types + ctypedef enum CLBlastLayout: + CLBlastLayoutRowMajor + CLBlastLayoutColMajor + ctypedef enum CLBlastTranspose: + CLBlastTransposeNo + CLBlastTransposeYes + CLBlastTransposeConjugate + ctypedef enum CLBlastTriangle: + CLBlastTriangleUpper + CLBlastTriangleLower + ctypedef enum CLBlastDiagonal: + CLBlastDiagonalNonUnit + CLBlastDiagonalUnit + ctypedef enum CLBlastSide: + CLBlastSideLeft + CLBlastSideRight + + # Precision enum + ctypedef enum CLBlastPrecision: + CLBlastPrecisionSingle + CLBlastPrecisionDouble + CLBlastPrecisionComplexSingle + CLBlastPrecisionComplexDouble + +# Translates status codes into readable messages +cdef get_status_message(CLBlastStatusCode status): + if status == CLBlastSuccess: + return "CLBlastSuccess" + if status == CLBlastOpenCLCompilerNotAvailable: + return "CLBlastOpenCLCompilerNotAvailable: CL_COMPILER_NOT_AVAILABLE" + if status == CLBlastTempBufferAllocFailure: + return "CLBlastTempBufferAllocFailure: CL_MEM_OBJECT_ALLOCATION_FAILURE" + if status == CLBlastOpenCLOutOfResources: + return "CLBlastOpenCLOutOfResources: CL_OUT_OF_RESOURCES" + if status == CLBlastOpenCLOutOfHostMemory: + return "CLBlastOpenCLOutOfHostMemory: CL_OUT_OF_HOST_MEMORY" + if status == CLBlastOpenCLBuildProgramFailure: + return "CLBlastOpenCLBuildProgramFailure: CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error" + if status == CLBlastInvalidValue: + return "CLBlastInvalidValue: CL_INVALID_VALUE" + if status == CLBlastInvalidCommandQueue: + return "CLBlastInvalidCommandQueue: CL_INVALID_COMMAND_QUEUE" + if status == CLBlastInvalidMemObject: + return "CLBlastInvalidMemObject: CL_INVALID_MEM_OBJECT" + if status == CLBlastInvalidBinary: + return "CLBlastInvalidBinary: CL_INVALID_BINARY" + if status == CLBlastInvalidBuildOptions: + return "CLBlastInvalidBuildOptions: CL_INVALID_BUILD_OPTIONS" + if status == CLBlastInvalidProgram: + return "CLBlastInvalidProgram: CL_INVALID_PROGRAM" + if status == CLBlastInvalidProgramExecutable: + return "CLBlastInvalidProgramExecutable: CL_INVALID_PROGRAM_EXECUTABLE" + if status == CLBlastInvalidKernelName: + return "CLBlastInvalidKernelName: CL_INVALID_KERNEL_NAME" + if status == CLBlastInvalidKernelDefinition: + return "CLBlastInvalidKernelDefinition: CL_INVALID_KERNEL_DEFINITION" + if status == CLBlastInvalidKernel: + return "CLBlastInvalidKernel: CL_INVALID_KERNEL" + if status == CLBlastInvalidArgIndex: + return "CLBlastInvalidArgIndex: CL_INVALID_ARG_INDEX" + if status == CLBlastInvalidArgValue: + return "CLBlastInvalidArgValue: CL_INVALID_ARG_VALUE" + if status == CLBlastInvalidArgSize: + return "CLBlastInvalidArgSize: CL_INVALID_ARG_SIZE" + if status == CLBlastInvalidKernelArgs: + return "CLBlastInvalidKernelArgs: CL_INVALID_KERNEL_ARGS" + if status == CLBlastInvalidLocalNumDimensions: + return "CLBlastInvalidLocalNumDimensions: CL_INVALID_WORK_DIMENSION: Too many thread dimensions" + if status == CLBlastInvalidLocalThreadsTotal: + return "CLBlastInvalidLocalThreadsTotal: CL_INVALID_WORK_GROUP_SIZE: Too many threads in total" + if status == CLBlastInvalidLocalThreadsDim: + return "CLBlastInvalidLocalThreadsDim: CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension" + if status == CLBlastInvalidGlobalOffset: + return "CLBlastInvalidGlobalOffset: CL_INVALID_GLOBAL_OFFSET" + if status == CLBlastInvalidEventWaitList: + return "CLBlastInvalidEventWaitList: CL_INVALID_EVENT_WAIT_LIST" + if status == CLBlastInvalidEvent: + return "CLBlastInvalidEvent: CL_INVALID_EVENT" + if status == CLBlastInvalidOperation: + return "CLBlastInvalidOperation: CL_INVALID_OPERATION" + if status == CLBlastInvalidBufferSize: + return "CLBlastInvalidBufferSize: CL_INVALID_BUFFER_SIZE" + if status == CLBlastInvalidGlobalWorkSize: + return "CLBlastInvalidGlobalWorkSize: CL_INVALID_GLOBAL_WORK_SIZE" + if status == CLBlastNotImplemented: + return "CLBlastNotImplemented: Routine or functionality not implemented yet" + if status == CLBlastInvalidMatrixA: + return "CLBlastInvalidMatrixA: Matrix A is not a valid OpenCL buffer" + if status == CLBlastInvalidMatrixB: + return "CLBlastInvalidMatrixB: Matrix B is not a valid OpenCL buffer" + if status == CLBlastInvalidMatrixC: + return "CLBlastInvalidMatrixC: Matrix C is not a valid OpenCL buffer" + if status == CLBlastInvalidVectorX: + return "CLBlastInvalidVectorX: Vector X is not a valid OpenCL buffer" + if status == CLBlastInvalidVectorY: + return "CLBlastInvalidVectorY: Vector Y is not a valid OpenCL buffer" + if status == CLBlastInvalidDimension: + return "CLBlastInvalidDimension: Dimensions M, N, and K have to be larger than zero" + if status == CLBlastInvalidLeadDimA: + return "CLBlastInvalidLeadDimA: LD of A is smaller than the matrix's first dimension" + if status == CLBlastInvalidLeadDimB: + return "CLBlastInvalidLeadDimB: LD of B is smaller than the matrix's first dimension" + if status == CLBlastInvalidLeadDimC: + return "CLBlastInvalidLeadDimC: LD of C is smaller than the matrix's first dimension" + if status == CLBlastInvalidIncrementX: + return "CLBlastInvalidIncrementX: Increment of vector X cannot be zero" + if status == CLBlastInvalidIncrementY: + return "CLBlastInvalidIncrementY: Increment of vector Y cannot be zero" + if status == CLBlastInsufficientMemoryA: + return "CLBlastInsufficientMemoryA: Matrix A's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryB: + return "CLBlastInsufficientMemoryB: Matrix B's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryC: + return "CLBlastInsufficientMemoryC: Matrix C's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryX: + return "CLBlastInsufficientMemoryX: Vector X's OpenCL buffer is too small" + if status == CLBlastInsufficientMemoryY: + return "CLBlastInsufficientMemoryY: Vector Y's OpenCL buffer is too small" + if status == CLBlastInvalidBatchCount: + return "CLBlastInvalidBatchCount: The batch count needs to be positive" + if status == CLBlastInvalidOverrideKernel: + return "CLBlastInvalidOverrideKernel: Trying to override parameters for an invalid kernel" + if status == CLBlastMissingOverrideParameter: + return "CLBlastMissingOverrideParameter: Missing override parameter(s) for the target kernel" + if status == CLBlastInvalidLocalMemUsage: + return "CLBlastInvalidLocalMemUsage: Not enough local memory available on this device" + if status == CLBlastNoHalfPrecision: + return "CLBlastNoHalfPrecision: Half precision (16-bits) not supported by the device" + if status == CLBlastNoDoublePrecision: + return "CLBlastNoDoublePrecision: Double precision (64-bits) not supported by the device" + if status == CLBlastInvalidVectorScalar: + return "CLBlastInvalidVectorScalar: The unit-sized vector is not a valid OpenCL buffer" + if status == CLBlastInsufficientMemoryScalar: + return "CLBlastInsufficientMemoryScalar: The unit-sized vector's OpenCL buffer is too small" + if status == CLBlastDatabaseError: + return "CLBlastDatabaseError: Entry for the device was not found in the database" + if status == CLBlastUnknownError: + return "CLBlastUnknownError: A catch-all error code representing an unspecified error" + if status == CLBlastUnexpectedError: + return "CLBlastUnexpectedError: A catch-all error code representing an unexpected exception" + return "PyCLBlast: unrecognized CLBlast status code (code %d)" % status + +#################################################################################################### +# Generic helpers +#################################################################################################### + +dtype_size = {np.dtype('float32'): 4, + np.dtype('float64'): 8, + np.dtype('complex64'): 8, + np.dtype('complex128'): 16} + +def dtypes_str(dtypes): + if len(dtypes) == 1: + return "'%s'" % dtypes[0] + return "one of %s" % dtypes + + +def check_dtype(args, dtypes): + dtype = args[0].dtype + if not all(arg.dtype == dtype for arg in args): + raise ValueError("PyCLBlast: All arguments must have the same dtype (%s)" % dtypes_str(dtypes)) + if dtype not in dtypes: + raise ValueError("PyCLBlast: Data type must be %s" % dtypes_str(dtypes)) + return dtype + + +def check_array(a, ndim, name): + if not isinstance(a, Array): + raise ValueError("PyCLBlast: '%s' must be a PyOpenCL Array" % name) + if not len(a.shape) == ndim: + raise ValueError("PyCLBlast: '%s' must have %d dimensions (got %d)" % (name, ndim, len(a.shape))) + + +def check_matrix(a, name): + check_array(a, 2, name) + + +def check_vector(a, name): + check_array(a, 1, name) + + +#################################################################################################### +# Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSscal(const size_t n, const float alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDscal(const size_t n, const double alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCscal(const size_t n, const cl_float2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0): + dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDscal(n, alpha, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCscal(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZscal(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXscal' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastScopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastScopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXcopy' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSaxpy(const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDaxpy(const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCaxpy(const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0): + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDaxpy(n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCaxpy(n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZaxpy(n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXaxpy' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two vectors: SDOT/DDOT/HDOT +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdot' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two complex vectors: CDOTU/ZDOTU +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dotu(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdotu' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def dotc(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): + dtype = check_dtype([x, y, dot], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(dot, "dot") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem dot_buffer = dot.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXdotc' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0): + dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(nrm2, "nrm2") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem nrm2_buffer = nrm2.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDznrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXnrm2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0): + dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(asum, "asum") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem asum_buffer = asum.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDzasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXasum' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastScsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0): + dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(sum, "sum") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem sum_buffer = sum.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastScsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastDzsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsum' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imax, "imax") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imax_buffer = imax.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXamax' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imin, "imin") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imin_buffer = imin.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXamin' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imax, "imax") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imax_buffer = imax.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXmax' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastiSmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiDmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiCmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + check_vector(x, "x") + check_matrix(imin, "imin") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem imin_buffer = imin.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastiSmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastiDmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastiCmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastiZmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXmin' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgemv(CLBlastLayoutRowMajor, a_transpose, m, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgemv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian matrix-vector multiplication: CHEMV/ZHEMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hemv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChemv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhemv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhemv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhbmv(CLBlastLayoutRowMajor, triangle, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([ap, x, y], ["complex64", "complex128"]) + check_matrix(ap, "ap") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpmv(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpmv(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsymv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([a, x, y], ["float32", "float64"]) + check_matrix(a, "a") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + +def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): + dtype = check_dtype([ap, x, y], ["float32", "float64"]) + check_matrix(ap, "ap") + check_vector(x, "x") + check_vector(y, "y") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtbmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0): + dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(ap, "ap") + check_vector(x, "x") + + cdef cl_mem ap_buffer = ap.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtpmv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + +def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_vector(x, "x") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem x_buffer = x.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrsv' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 matrix update: SGER/DGER/HGER +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSger(const CLBlastLayout layout, const size_t m, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDger(CLBlastLayoutRowMajor, m, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXger' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 complex matrix update: CGERU/ZGERU +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def geru(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCgeru(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgeru(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgeru' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General rank-1 complex conjugated matrix update: CGERC/ZGERC +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def gerc(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCgerc(CLBlastLayoutRowMajor, m, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgerc(CLBlastLayoutRowMajor, m, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgerc' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian rank-1 matrix update: CHER/ZHER +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def her(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): + dtype = check_dtype([x, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed rank-1 matrix update: CHPR/ZHPR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def hpr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): + dtype = check_dtype([x, ap], ["complex64", "complex128"]) + check_vector(x, "x") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian rank-2 matrix update: CHER2/ZHER2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def her2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): + dtype = check_dtype([x, y, ap], ["complex64", "complex128"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChpr2(CLBlastLayoutRowMajor, triangle, n, cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhpr2(CLBlastLayoutRowMajor, triangle, n, cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhpr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric rank-1 matrix update: SSYR/DSYR/HSYR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): + dtype = check_dtype([x, a], ["float32", "float64"]) + check_vector(x, "x") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): + dtype = check_dtype([x, ap], ["float32", "float64"]) + check_vector(x, "x") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspr' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + +def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): + dtype = check_dtype([x, y, a], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(a, "a") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem a_buffer = a.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + +def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): + dtype = check_dtype([x, y, ap], ["float32", "float64"]) + check_vector(x, "x") + check_vector(y, "y") + check_matrix(ap, "ap") + + cdef cl_mem x_buffer = x.base_data.int_ptr + cdef cl_mem y_buffer = y.base_data.int_ptr + cdef cl_mem ap_buffer = ap.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXspr2' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXgemm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsymm(CLBlastLayoutRowMajor, side, triangle, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsymm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Hermitian matrix-matrix multiplication: CHEMM/ZHEMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastChemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def hemm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastChemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZhemm(CLBlastLayoutRowMajor, side, triangle, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXhemm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): + dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyrk' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-K update of a hermitian matrix: CHERK/ZHERK +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def herk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): + dtype = check_dtype([a, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, alpha, a_buffer, a_offset, a_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXherk' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastSsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastSsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXsyr2k' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Rank-2K update of a hermitian matrix: CHER2K/ZHER2K +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastCher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + +def her2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): + dtype = check_dtype([a, b, c], ["complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + check_matrix(c, "c") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + cdef cl_mem c_buffer = c.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo + + cdef CLBlastStatusCode err + if dtype == np.dtype("complex64"): + err = CLBlastCher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXher2k' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + +def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): + dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrmm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### +# Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM +#################################################################################################### + +cdef extern from "clblast_c.h": + CLBlastStatusCode CLBlastStrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastDtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastCtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + +def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): + dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) + check_matrix(a, "a") + check_matrix(b, "b") + + cdef cl_mem a_buffer = a.base_data.int_ptr + cdef cl_mem b_buffer = b.base_data.int_ptr + + cdef cl_command_queue command_queue = queue.int_ptr + cdef cl_event event = NULL + side = CLBlastSideRight if right_side else CLBlastSideLeft + triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper + a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo + diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit + + cdef CLBlastStatusCode err + if dtype == np.dtype("float32"): + err = CLBlastStrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("float64"): + err = CLBlastDtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex64"): + err = CLBlastCtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("complex128"): + err = CLBlastZtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + else: + raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) + if err != CLBlastSuccess: + raise RuntimeError("PyCLBlast: 'CLBlastXtrsm' failed: %s" % get_status_message(err)) + return cl.Event.from_int_ptr(event) + +#################################################################################################### -- cgit v1.2.3 From c3a3976b7deac175557d74cab4fceb95f2531601 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 18:01:26 +0100 Subject: Updated changelog and roadmap: Python package created --- CHANGELOG | 1 + ROADMAP.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 13b20d9d..89af84f2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Development (next version) +- Added Python interface to CLBlast 'PyCLBlast' - Added non-BLAS level-1 routines: * SHAD/DHAD/CHAD/ZHAD/HHAD (Hadamard element-wise vector-vector product) diff --git a/ROADMAP.md b/ROADMAP.md index df42f75c..df7f6488 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -15,5 +15,5 @@ This file gives an overview of the main features planned for addition to CLBlast | [#95](https://github.com/CNugteren/CLBlast/issues/95) & #237 | Jan '18 | CNugteren | ✔ | Implement strided batch GEMM | | [#224](https://github.com/CNugteren/CLBlast/issues/224) | Jan-Feb '18 | CNugteren | ✔ | Implement Hadamard product (element-wise vector-vector product) | | [#233](https://github.com/CNugteren/CLBlast/issues/233) | Feb '18 | CNugteren | | Add CLBlast to common package managers | -| [#223](https://github.com/CNugteren/CLBlast/issues/223) | Feb '18 | CNugteren | | Python OpenCL interface | +| [#223](https://github.com/CNugteren/CLBlast/issues/223) | Feb '18 | CNugteren | ✔ | Python OpenCL interface | | [#169](https://github.com/CNugteren/CLBlast/issues/169) | ?? | dividiti | | Problem-specific tuning parameter selection | -- cgit v1.2.3 From fc10a4baca150811361a0147cd748008acc3cfca Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Feb 2018 20:19:19 +0100 Subject: Set initial pyclblast to be version 1.0.0 --- .gitignore | 4 +++- src/pyclblast/setup.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8ccab476..b0f39c40 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ stash *.pyc database.json database_best.json -cl.hpp \ No newline at end of file +cl.hpp +src/pyclblast/dist +*.egg-info diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py index d5916e06..b6b95d1a 100644 --- a/src/pyclblast/setup.py +++ b/src/pyclblast/setup.py @@ -22,10 +22,10 @@ ext_modules.append( setup( name="pyclblast", - version="1.3.0", + version="1.0.0", author="Cedric Nugteren", author_email="web@cedricnugteren.nl", - url="https://github.com/cnugteren/clblast", + url="https://github.com/CNugteren/CLBlast/blob/master/src/pyclblast", description="Python bindings for CLBlast, the tuned OpenCL BLAS library", license="Apache Software License", requires=["numpy", "pyopencl", "cython"], -- cgit v1.2.3