diff options
-rwxr-xr-x | scripts/generator/generator.py | 2 | ||||
-rw-r--r-- | scripts/generator/generator/pyclblast.py | 7 | ||||
-rw-r--r-- | src/pyclblast/README.md | 8 | ||||
-rw-r--r-- | src/pyclblast/samples/haxpy.py | 38 | ||||
-rw-r--r-- | src/pyclblast/setup.py | 2 | ||||
-rw-r--r-- | src/pyclblast/src/pyclblast.pyx | 191 |
6 files changed, 212 insertions, 36 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 68e3f01a..76c5dc1c 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -49,7 +49,7 @@ FILES = [ "/src/clblast_cuda.cpp", "/src/pyclblast/src/pyclblast.pyx" ] -HEADER_LINES = [124, 21, 128, 24, 29, 45, 29, 66, 40, 96, 21, 290] +HEADER_LINES = [124, 21, 128, 24, 29, 45, 29, 66, 40, 96, 21, 327] FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 232 diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py index ab719f5e..47eb2eb4 100644 --- a/scripts/generator/generator/pyclblast.py +++ b/scripts/generator/generator/pyclblast.py @@ -18,6 +18,7 @@ def to_np_dtype(flavour): "D": "float64", "C": "complex64", "Z": "complex128", + "H": "float16", }[flavour.precision_name] @@ -31,6 +32,8 @@ def scalar_cython_conversion(scalar, flavour): return "<cl_float2>cl_float2(x=" + scalar + ".real,y=" + scalar + ".imag)" if scalar_type in ["cl_double2", "double2"]: return "<cl_double2>cl_double2(x=" + scalar + ".real,y=" + scalar + ".imag)" + if scalar_type in ["cl_half", "half"]: + return "<cl_half>" + scalar raise RuntimeError("Could not convert flavour '%s:%s'" % (flavour.precision_name, scalar_type)) @@ -48,7 +51,7 @@ def generate_pyx(routine): result += "cdef extern from \"clblast_c.h\":" + NL np_dtypes = [] for flavour in routine.flavours: - if flavour.precision_name in ["S", "D", "C", "Z"]: + if flavour.precision_name in ["S", "D", "C", "Z", "H"]: result += indent + "CLBlastStatusCode CLBlast" + flavour.name + routine.plain_name() + "(" result += ", ".join(routine.arguments_def_c(flavour)) + "," result += "cl_command_queue* queue, cl_event* event)" + NL @@ -103,7 +106,7 @@ def generate_pyx(routine): result += indent + "cdef CLBlastStatusCode err" + NL if_prefix = "" for flavour in routine.flavours: - if flavour.precision_name in ["S", "D", "C", "Z"]: + if flavour.precision_name in ["S", "D", "C", "Z", "H"]: np_dtype = to_np_dtype(flavour) argument_names = [x. replace("layout", "CLBlastLayoutRowMajor"). diff --git a/src/pyclblast/README.md b/src/pyclblast/README.md index 2f6ebed7..a6a55907 100644 --- a/src/pyclblast/README.md +++ b/src/pyclblast/README.md @@ -30,6 +30,14 @@ After installation OpenCL and CLBlast, simply use pip to install PyCLBlast, e.g. To start using the library, browse the [CLBlast](https://github.com/CNugteren/CLBlast) documentation or check out the PyCLBlast samples provides in the `samples` subfolder. +For developers, first install CLBlast, followed by the Python requirements (e.g. in a Python3 virtualenv): + + pip install Cython numpy pybind11 + pip install pyopencl + +And then compile the library from this location using the `setup.py` file: + + python setup.py install Testing PyCLBlast ------------- diff --git a/src/pyclblast/samples/haxpy.py b/src/pyclblast/samples/haxpy.py new file mode 100644 index 00000000..3db87a34 --- /dev/null +++ b/src/pyclblast/samples/haxpy.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. +# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> + +import numpy as np +import pyopencl as cl +from pyopencl.array import Array +import pyclblast + +# Settings for this sample +dtype = 'float16' +alpha = 1.5 +alpha_fp16 = pyclblast.float32_to_float16(alpha) +n = 4 + +print("# Setting up OpenCL") +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +print("# Setting up Numpy arrays") +x = np.linspace(1.0, n, num=n).astype(dtype=dtype) +y = np.linspace(1.0, n / 2, num=n).astype(dtype=dtype) + +print("# Setting up OpenCL arrays") +clx = Array(queue, x.shape, x.dtype) +cly = Array(queue, y.shape, y.dtype) +clx.set(x) +cly.set(y) + +print("# Example level-1 operation: AXPY") +pyclblast.axpy(queue, n, clx, cly, alpha=alpha_fp16) +queue.finish() +print("# Result for vector y: %s" % cly.get()) +print("# Expected result: %s" % (alpha * x + y)) diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py index 65aeca3a..1c1bf3ab 100644 --- a/src/pyclblast/setup.py +++ b/src/pyclblast/setup.py @@ -22,7 +22,7 @@ ext_modules.append( setup( name="pyclblast", - version="1.1.0", + version="1.2.0", author="Cedric Nugteren", author_email="web@cedricnugteren.nl", url="https://github.com/CNugteren/CLBlast/blob/master/src/pyclblast", diff --git a/src/pyclblast/src/pyclblast.pyx b/src/pyclblast/src/pyclblast.pyx index 22cb680a..14efcf8a 100644 --- a/src/pyclblast/src/pyclblast.pyx +++ b/src/pyclblast/src/pyclblast.pyx @@ -11,6 +11,8 @@ # #################################################################################################### +import binascii +import struct import numpy as np import pyopencl as cl from pyopencl.array import Array @@ -94,6 +96,7 @@ cdef extern from "clblast_c.h": ctypedef struct cl_double2: cl_double x cl_double y + ctypedef unsigned short cl_half # OpenCL special data-types struct _cl_mem: @@ -287,6 +290,40 @@ def check_matrix(a, name): def check_vector(a, name): check_array(a, 1, name) +#################################################################################################### +# Half-precision utility functions +#################################################################################################### + +def float32_to_float16(float32): + # Taken from https://gamedev.stackexchange.com/a/28756 + F16_EXPONENT_BITS = 0x1F + F16_EXPONENT_SHIFT = 10 + F16_EXPONENT_BIAS = 15 + F16_MANTISSA_BITS = 0x3ff + F16_MANTISSA_SHIFT = (23 - F16_EXPONENT_SHIFT) + F16_MAX_EXPONENT = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT) + + a = struct.pack('>f', float32) + b = binascii.hexlify(a) + + f32 = int(b, 16) + sign = (f32 >> 16) & 0x8000 + exponent = ((f32 >> 23) & 0xff) - 127 + mantissa = f32 & 0x007fffff + + if exponent == 128: + f16 = sign | F16_MAX_EXPONENT + if mantissa: + f16 |= (mantissa & F16_MANTISSA_BITS) + elif exponent > 15: + f16 = sign | F16_MAX_EXPONENT + elif exponent > -15: + exponent += F16_EXPONENT_BIAS + mantissa >>= F16_MANTISSA_SHIFT + f16 = sign | exponent << F16_EXPONENT_SHIFT | mantissa + else: + f16 = sign + return f16 #################################################################################################### # Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP @@ -297,13 +334,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): """ xSWAP: Swap two vectors """ - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_vector(y, "y") @@ -322,6 +360,8 @@ def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): err = CLBlastCswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -337,13 +377,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDscal(const size_t n, const double alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCscal(const size_t n, const cl_float2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHscal(const size_t n, const cl_half alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0): """ xSCAL: Vector scaling """ - dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr @@ -360,6 +401,8 @@ def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0): err = CLBlastCscal(n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZscal(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHscal(n, <cl_half>alpha, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -375,13 +418,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): """ xCOPY: Vector copy """ - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_vector(y, "y") @@ -400,6 +444,8 @@ def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0): err = CLBlastCcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -415,13 +461,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDaxpy(const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCaxpy(const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHaxpy(const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0): """ xAXPY: Vector-times-constant plus vector """ - dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_vector(y, "y") @@ -440,6 +487,8 @@ def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offs err = CLBlastCaxpy(n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZaxpy(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHaxpy(n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -453,13 +502,14 @@ def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offs cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0): """ xDOT: Dot product of two vectors """ - dtype = check_dtype([x, y, dot], ["float32", "float64"]) + dtype = check_dtype([x, y, dot], ["float32", "float64", "float16"]) check_vector(x, "x") check_vector(y, "y") check_matrix(dot, "dot") @@ -476,6 +526,8 @@ def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, d err = CLBlastSdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -563,13 +615,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastScnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0): """ xNRM2: Euclidian norm of a vector """ - dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(nrm2, "nrm2") @@ -588,6 +641,8 @@ def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0): err = CLBlastScnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastDznrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -603,13 +658,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastScasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0): """ xASUM: Absolute sum of values in a vector """ - dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(asum, "asum") @@ -628,6 +684,8 @@ def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0): err = CLBlastScasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastDzasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -643,13 +701,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastScsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0): """ xSUM: Sum of values in a vector (non-BLAS function) """ - dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(sum, "sum") @@ -668,6 +727,8 @@ def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0): err = CLBlastScsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastDzsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -683,13 +744,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastiDamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiCamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiHamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): """ xAMAX: Index of absolute maximum value in a vector """ - dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(imax, "imax") @@ -708,6 +770,8 @@ def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): err = CLBlastiCamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastiZamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastiHamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -723,13 +787,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastiDamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiCamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiHamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): """ xAMIN: Index of absolute minimum value in a vector (non-BLAS function) """ - dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(imin, "imin") @@ -748,6 +813,8 @@ def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): err = CLBlastiCamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastiZamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastiHamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -763,13 +830,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastiDmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiCmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiHmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): """ xMAX: Index of maximum value in a vector (non-BLAS function) """ - dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(imax, "imax") @@ -788,6 +856,8 @@ def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0): err = CLBlastiCmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastiZmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastiHmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -803,13 +873,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastiDmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiCmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastiHmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): """ xMIN: Index of minimum value in a vector (non-BLAS function) """ - dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128", "float16"]) check_vector(x, "x") check_matrix(imin, "imin") @@ -828,6 +899,8 @@ def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0): err = CLBlastiCmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastiZmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastiHmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -843,13 +916,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): """ xGEMV: General matrix-vector multiplication """ - dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_vector(x, "x") check_vector(y, "y") @@ -871,6 +945,8 @@ def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0 err = CLBlastCgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -886,13 +962,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0): """ xGBMV: General banded matrix-vector multiplication """ - dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_vector(x, "x") check_vector(y, "y") @@ -914,6 +991,8 @@ def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, err = CLBlastCgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1038,13 +1117,14 @@ def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0. cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): """ xSYMV: Symmetric matrix-vector multiplication """ - dtype = check_dtype([a, x, y], ["float32", "float64"]) + dtype = check_dtype([a, x, y], ["float32", "float64", "float16"]) check_matrix(a, "a") check_vector(x, "x") check_vector(y, "y") @@ -1062,6 +1142,8 @@ def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, err = CLBlastSsymv(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsymv(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1075,13 +1157,14 @@ def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0): """ xSBMV: Symmetric banded matrix-vector multiplication """ - dtype = check_dtype([a, x, y], ["float32", "float64"]) + dtype = check_dtype([a, x, y], ["float32", "float64", "float16"]) check_matrix(a, "a") check_vector(x, "x") check_vector(y, "y") @@ -1099,6 +1182,8 @@ def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0 err = CLBlastSsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1112,13 +1197,14 @@ def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0 cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event) def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0): """ xSPMV: Symmetric packed matrix-vector multiplication """ - dtype = check_dtype([ap, x, y], ["float32", "float64"]) + dtype = check_dtype([ap, x, y], ["float32", "float64", "float16"]) check_matrix(ap, "ap") check_vector(x, "x") check_vector(y, "y") @@ -1136,6 +1222,8 @@ def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0. err = CLBlastSspmv(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHspmv(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1151,13 +1239,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): """ xTRMV: Triangular matrix-vector multiplication """ - dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_vector(x, "x") @@ -1179,6 +1268,8 @@ def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = Fal err = CLBlastCtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1194,13 +1285,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0): """ xTBMV: Triangular banded matrix-vector multiplication """ - dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_vector(x, "x") @@ -1222,6 +1314,8 @@ def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = err = CLBlastCtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1237,13 +1331,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event) def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0): """ xTPMV: Triangular packed matrix-vector multiplication """ - dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(ap, "ap") check_vector(x, "x") @@ -1265,6 +1360,8 @@ def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = F err = CLBlastCtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1321,13 +1418,14 @@ def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = Fal cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSger(const CLBlastLayout layout, const size_t m, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHger(const CLBlastLayout layout, const size_t m, const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0): """ xGER: General rank-1 matrix update """ - dtype = check_dtype([x, y, a], ["float32", "float64"]) + dtype = check_dtype([x, y, a], ["float32", "float64", "float16"]) check_vector(x, "x") check_vector(y, "y") check_matrix(a, "a") @@ -1344,6 +1442,8 @@ def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset err = CLBlastSger(CLBlastLayoutRowMajor, m, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDger(CLBlastLayoutRowMajor, m, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHger(CLBlastLayoutRowMajor, m, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1573,13 +1673,14 @@ def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_tri cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0): """ xSYR: Symmetric rank-1 matrix update """ - dtype = check_dtype([x, a], ["float32", "float64"]) + dtype = check_dtype([x, a], ["float32", "float64", "float16"]) check_vector(x, "x") check_matrix(a, "a") @@ -1595,6 +1696,8 @@ def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_ err = CLBlastSsyr(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsyr(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1608,13 +1711,14 @@ def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0): """ xSPR: Symmetric packed rank-1 matrix update """ - dtype = check_dtype([x, ap], ["float32", "float64"]) + dtype = check_dtype([x, ap], ["float32", "float64", "float16"]) check_vector(x, "x") check_matrix(ap, "ap") @@ -1630,6 +1734,8 @@ def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, err = CLBlastSspr(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHspr(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1643,13 +1749,14 @@ def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event) def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0): """ xSYR2: Symmetric rank-2 matrix update """ - dtype = check_dtype([x, y, a], ["float32", "float64"]) + dtype = check_dtype([x, y, a], ["float32", "float64", "float16"]) check_vector(x, "x") check_vector(y, "y") check_matrix(a, "a") @@ -1667,6 +1774,8 @@ def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_trian err = CLBlastSsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1680,13 +1789,14 @@ def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_trian cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_half alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event) def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0): """ xSPR2: Symmetric packed rank-2 matrix update """ - dtype = check_dtype([x, y, ap], ["float32", "float64"]) + dtype = check_dtype([x, y, ap], ["float32", "float64", "float16"]) check_vector(x, "x") check_vector(y, "y") check_matrix(ap, "ap") @@ -1704,6 +1814,8 @@ def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_tri err = CLBlastSspr2(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) elif dtype == np.dtype("float64"): err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHspr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1719,13 +1831,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_half beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): """ xGEMM: General matrix-matrix multiplication """ - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_matrix(b, "b") check_matrix(c, "c") @@ -1748,6 +1861,8 @@ def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_t err = CLBlastCgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1763,13 +1878,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_half beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0): """ xSYMM: Symmetric matrix-matrix multiplication """ - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_matrix(b, "b") check_matrix(c, "c") @@ -1792,6 +1908,8 @@ def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_ err = CLBlastCsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1845,13 +1963,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_half beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0): """ xSYRK: Rank-K update of a symmetric matrix """ - dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_matrix(c, "c") @@ -1872,6 +1991,8 @@ def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle err = CLBlastCsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -1923,13 +2044,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_half beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event) def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0): """ xSYR2K: Rank-2K update of a symmetric matrix """ - dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_matrix(b, "b") check_matrix(c, "c") @@ -1952,6 +2074,8 @@ def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower err = CLBlastCsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -2005,13 +2129,14 @@ cdef extern from "clblast_c.h": CLBlastStatusCode CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) + CLBlastStatusCode CLBlastHtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_half alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event) def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0): """ xTRMM: Triangular matrix-matrix multiplication """ - dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"]) + dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128", "float16"]) check_matrix(a, "a") check_matrix(b, "b") @@ -2034,6 +2159,8 @@ def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_t err = CLBlastCtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) elif dtype == np.dtype("complex128"): err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) + elif dtype == np.dtype("float16"): + err = CLBlastHtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event) else: raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype) if err != CLBlastSuccess: @@ -2098,7 +2225,7 @@ def override_parameters(device, kernel_name, precision, parameters): """ Override the current parameters for the given kernel, on this device, with this precision. """ - + cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr # read the parameters dictionary into names/values arrays, for use in CLBlastOverrideParameters @@ -2110,7 +2237,7 @@ def override_parameters(device, kernel_name, precision, parameters): for i, (k, v) in enumerate(parameters.items()): parameter_names[i] = strdup(k.encode('ascii')) parameter_values[i] = v - + # call the underlying API err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values) if err != CLBlastSuccess: |