summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-02-18 20:44:00 +0100
committerGitHub <noreply@github.com>2018-02-18 20:44:00 +0100
commitf8c8d167bb02e024141f1a38d4867d3fedc9267e (patch)
tree1e7b39c9cf7279e9257a28b9a7a021a39a6191bc
parentc5a28cd70b693431a4bee7c4d8e2062bc2edcd18 (diff)
parentfc10a4baca150811361a0147cd748008acc3cfca (diff)
Merge pull request #247 from CNugteren/CLBlast-223-python-interface
PyCLBlast: a Python interface for CLBlast based on PyOpenCL
-rw-r--r--.gitignore4
-rw-r--r--CHANGELOG1
-rw-r--r--ROADMAP.md2
-rwxr-xr-xscripts/generator/generator.py12
-rw-r--r--scripts/generator/generator/pyclblast.py113
-rw-r--r--scripts/generator/generator/routine.py32
-rw-r--r--src/pyclblast/MANIFEST.in2
-rw-r--r--src/pyclblast/README.md31
-rw-r--r--src/pyclblast/samples/saxpy.py36
-rw-r--r--src/pyclblast/samples/sgemm.py38
-rw-r--r--src/pyclblast/samples/sgemv.py40
-rw-r--r--src/pyclblast/setup.py45
-rw-r--r--src/pyclblast/src/pyclblast.pyx1901
13 files changed, 2252 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
index 8ccab476..b0f39c40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@ stash
*.pyc
database.json
database_best.json
-cl.hpp \ No newline at end of file
+cl.hpp
+src/pyclblast/dist
+*.egg-info
diff --git a/CHANGELOG b/CHANGELOG
index 13b20d9d..89af84f2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,6 @@
Development (next version)
+- Added Python interface to CLBlast 'PyCLBlast'
- Added non-BLAS level-1 routines:
* SHAD/DHAD/CHAD/ZHAD/HHAD (Hadamard element-wise vector-vector product)
diff --git a/ROADMAP.md b/ROADMAP.md
index df42f75c..df7f6488 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -15,5 +15,5 @@ This file gives an overview of the main features planned for addition to CLBlast
| [#95](https://github.com/CNugteren/CLBlast/issues/95) & #237 | Jan '18 | CNugteren | ✔ | Implement strided batch GEMM |
| [#224](https://github.com/CNugteren/CLBlast/issues/224) | Jan-Feb '18 | CNugteren | ✔ | Implement Hadamard product (element-wise vector-vector product) |
| [#233](https://github.com/CNugteren/CLBlast/issues/233) | Feb '18 | CNugteren | | Add CLBlast to common package managers |
-| [#223](https://github.com/CNugteren/CLBlast/issues/223) | Feb '18 | CNugteren | | Python OpenCL interface |
+| [#223](https://github.com/CNugteren/CLBlast/issues/223) | Feb '18 | CNugteren | ✔ | Python OpenCL interface |
| [#169](https://github.com/CNugteren/CLBlast/issues/169) | ?? | dividiti | | Problem-specific tuning parameter selection |
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 955625f5..8c071ab3 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -18,6 +18,7 @@
# clblast_netlib_c.cpp
# wrapper_clblas.h
# wrapper_cblas.h
+# pyclblast.pyx
# It also generates the main functions for the correctness and performance tests as found in
# test/correctness/routines/levelX/xYYYY.cpp
# test/performance/routines/levelX/xYYYY.cpp
@@ -30,6 +31,7 @@ import argparse
import generator.cpp as cpp
import generator.doc as doc
+import generator.pyclblast as pyclblast
from generator.routine import Routine
from generator.datatype import H, S, D, C, Z, Sc, Dz, iH, iS, iD, iC, iZ, Css, Zdd, Ccs, Zzd, T, Tc, TU
@@ -45,9 +47,10 @@ FILES = [
"/src/clblast_netlib_c.cpp",
"/include/clblast_cuda.h",
"/src/clblast_cuda.cpp",
+ "/src/pyclblast/src/pyclblast.pyx"
]
-HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21]
-FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55]
+HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21, 288]
+FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55, 1]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 63
@@ -209,7 +212,8 @@ def main(argv):
body = ""
levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4]
for level in levels:
- body += cpp.LEVEL_SEPARATORS[level - 1] + "\n"
+ if i not in [11]:
+ body += cpp.LEVEL_SEPARATORS[level - 1] + "\n"
for routine in ROUTINES[level - 1]:
if i == 0:
body += cpp.clblast_h(routine)
@@ -235,6 +239,8 @@ def main(argv):
body += cpp.clblast_h(routine, cuda=True)
if i == 10:
body += cpp.clblast_cc(routine, cuda=True)
+ if i == 11:
+ body += pyclblast.generate_pyx(routine)
f.write("".join(file_header))
f.write(body)
f.write("".join(file_footer))
diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py
new file mode 100644
index 00000000..85bcc93f
--- /dev/null
+++ b/scripts/generator/generator/pyclblast.py
@@ -0,0 +1,113 @@
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+NL = "\n"
+SEPARATOR = "####################################################################################################"
+
+
+def to_np_dtype(flavour):
+ if flavour.precision_name == "S":
+ return "float32"
+ if flavour.precision_name == "D":
+ return "float64"
+ if flavour.precision_name == "C":
+ return "complex64"
+ if flavour.precision_name == "Z":
+ return "complex128"
+ raise RuntimeError("Could not convert flavour '%s' to numpy" % flavour.precision_name)
+
+
+def scalar_cython_conversion(scalar, flavour):
+ scalar_type = flavour.alpha_cl if scalar == "alpha" else flavour.beta_cl
+ if scalar_type == "float":
+ return "<cl_float>" + scalar
+ if scalar_type == "double":
+ return "<cl_double>" + scalar
+ if scalar_type in ["cl_float2", "float2"]:
+ return "<cl_float2>cl_float2(x=" + scalar + ".real,y=" + scalar + ".imag)"
+ if scalar_type in ["cl_double2", "double2"]:
+ return "<cl_double2>cl_double2(x=" + scalar + ".real,y=" + scalar + ".imag)"
+ raise RuntimeError("Could not convert flavour '%s:%s'" % (flavour.precision_name, scalar_type))
+
+
+def generate_pyx(routine):
+ result = ""
+ if routine.implemented and routine.plain_name() and routine.level in ["1", "2a", "2b", "3"]:
+
+ result += SEPARATOR + NL
+ result += "# " + routine.description + ": " + routine.short_names() + NL
+ result += SEPARATOR + NL
+ result += NL
+
+ result += "cdef extern from \"clblast_c.h\":" + NL
+ np_dtypes = []
+ for flavour in routine.flavours:
+ if flavour.precision_name in ["S", "D", "C", "Z"]:
+ result += " CLBlastStatusCode CLBlast" + flavour.name + routine.plain_name() + "("
+ result += ", ".join(routine.arguments_def_c(flavour)) + ","
+ result += "cl_command_queue* queue, cl_event* event)" + NL
+ np_dtypes.append(to_np_dtype(flavour))
+ result += "" + NL
+
+ buffers = routine.inputs[:] + routine.outputs[:]
+ result += "def " + routine.plain_name() + "(queue, "
+ result += ", ".join(routine.arguments_python()) + "):" + NL
+ result += " dtype = check_dtype([" + ", ".join(buffers) + "], "
+ result += "[" + ", ".join(['"%s"' % d for d in np_dtypes]) + "])" + NL
+ for buf in buffers:
+ if buf in routine.buffers_vector():
+ result += " check_vector("
+ else:
+ result += " check_matrix("
+ result += buf + ", \"" + buf + "\")" + NL
+ result += "" + NL
+
+ for buf in buffers:
+ result += " cdef cl_mem " + buf + "_buffer = <cl_mem><size_t>" + buf + ".base_data.int_ptr" + NL
+ result += "" + NL
+
+ result += " cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr" + NL
+ result += " cdef cl_event event = NULL" + NL
+
+ for option in routine.options:
+ if option == "a_transpose":
+ result += " a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo" + NL
+ if option == "b_transpose":
+ result += " b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo" + NL
+ if option == "ab_transpose":
+ result += " ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo" + NL
+ if option == "side":
+ result += " side = CLBlastSideRight if right_side else CLBlastSideLeft" + NL
+ if option == "triangle":
+ result += " triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper" + NL
+ if option == "diagonal":
+ result += " diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit" + NL
+
+ result += "" + NL
+ result += " cdef CLBlastStatusCode err" + NL
+ if_prefix = ""
+ for flavour in routine.flavours:
+ if flavour.precision_name in ["S", "D", "C", "Z"]:
+ np_dtype = to_np_dtype(flavour)
+ argument_names = [x.
+ replace("layout", "CLBlastLayoutRowMajor").
+ replace("alpha", scalar_cython_conversion("alpha", flavour)).
+ replace("beta", scalar_cython_conversion("beta", flavour))
+ for x in routine.arguments()]
+ result += " " + if_prefix + "if dtype == np.dtype(\"" + np_dtype + "\"):" + NL
+ result += " err = CLBlast" + flavour.name + routine.plain_name()
+ result += "(" + ", ".join(argument_names) + ", &command_queue, &event)" + NL
+ if_prefix = "el"
+
+ result += " else:" + NL
+ result += " raise ValueError(\"PyCLBlast: Unrecognized data-type '%s'\" % dtype)" + NL
+ result += " if err != CLBlastSuccess:" + NL
+ result += " raise RuntimeError(\"PyCLBlast: 'CLBlastX" + routine.plain_name() + "' failed: %s\" % get_status_message(err))" + NL
+ result += " return cl.Event.from_int_ptr(<size_t>event)" + NL
+ result += NL
+
+ return result
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 052709ee..c52f49ca 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -815,6 +815,38 @@ class Routine:
list(chain(*[self.scalar_doc(s) for s in self.other_scalars()])) +
self.batch_count_doc())
+ def arguments_python(self):
+ """Arguments for the Python wrapper pyclblast"""
+ result = list()
+ result.extend(self.sizes)
+ buffers = self.inputs + self.outputs
+ result.extend(buffers[:])
+ for buf in buffers:
+ if buf in self.buffers_matrix():
+ result.append(buf + "_ld")
+ for buf in buffers:
+ if buf in self.buffers_vector():
+ result.append(buf + "_inc = 1")
+ for scalar in self.scalars:
+ default = "1.0" if scalar == "alpha" else "0.0"
+ result.append(scalar + " = " + default)
+ for option in self.options:
+ if option == "a_transpose":
+ result.append("a_transp = False")
+ if option == "b_transpose":
+ result.append("b_transp = False")
+ if option == "ab_transpose":
+ result.append("ab_transp = False")
+ if option == "side":
+ result.append("right_side = False")
+ if option == "triangle":
+ result.append("lower_triangle = False")
+ if option == "diagonal":
+ result.append("unit_diagonal = False")
+ for buf in buffers:
+ result.append(buf + "_offset = 0")
+ return result
+
def requirements_doc(self):
"""Retrieves a list of routine requirements for documentation"""
return self.requirements
diff --git a/src/pyclblast/MANIFEST.in b/src/pyclblast/MANIFEST.in
new file mode 100644
index 00000000..fb20923f
--- /dev/null
+++ b/src/pyclblast/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.md setup.py src/*.pyx
+include samples/*.py
diff --git a/src/pyclblast/README.md b/src/pyclblast/README.md
new file mode 100644
index 00000000..be37af01
--- /dev/null
+++ b/src/pyclblast/README.md
@@ -0,0 +1,31 @@
+
+PyCLBlast: Python wrappers for the tuned OpenCL BLAS library CLBlast
+================
+
+This Python package provides a straightforward wrapper for CLBast based on PyOpenCL. CLBlast is a modern, lightweight, performant and tunable OpenCL BLAS library written in C++11. It is designed to leverage the full performance potential of a wide variety of OpenCL devices from different vendors, including desktop and laptop GPUs, embedded GPUs, and other accelerators. CLBlast implements BLAS routines: basic linear algebra subprograms operating on vectors and matrices.
+
+See [the CLBlast repository](https://github.com/CNugteren/CLBlast) and [the CLBlast website](https://cnugteren.github.io/clblast) for more information about CLBlast.
+
+
+Prerequisites
+-------------
+
+Non-Python requirements:
+
+* OpenCL
+* [CLBlast](https://github.com/CNugteren/CLBlast)
+
+Python requirements:
+
+* Cython
+* [PyOpenCL](https://github.com/pyopencl/pyopencl/)
+
+
+Getting started
+-------------
+
+After installation OpenCL and CLBlast, simply use pip to install PyCLBlast, e.g.:
+
+ pip install --user pyclblast
+
+To start using the library, browse the [CLBlast](https://github.com/CNugteren/CLBlast) documentation or check out the PyCLBlast samples provides in the `samples` subfolder.
diff --git a/src/pyclblast/samples/saxpy.py b/src/pyclblast/samples/saxpy.py
new file mode 100644
index 00000000..098e44d5
--- /dev/null
+++ b/src/pyclblast/samples/saxpy.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
+# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import numpy as np
+import pyopencl as cl
+from pyopencl.array import Array
+import pyclblast
+
+# Settings for this sample
+dtype = 'float32'
+alpha = 1.5
+n = 4
+
+print("# Setting up OpenCL")
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+print("# Setting up Numpy arrays")
+x = np.random.rand(n).astype(dtype=dtype)
+y = np.random.rand(n).astype(dtype=dtype)
+
+print("# Setting up OpenCL arrays")
+clx = Array(queue, x.shape, x.dtype)
+cly = Array(queue, y.shape, y.dtype)
+clx.set(x)
+cly.set(y)
+
+print("# Example level-1 operation: AXPY")
+pyclblast.axpy(queue, n, clx, cly, alpha=alpha)
+print("# Result for vector y: %s" % cly.get())
+print("# Expected result: %s" % (alpha * x + y))
diff --git a/src/pyclblast/samples/sgemm.py b/src/pyclblast/samples/sgemm.py
new file mode 100644
index 00000000..c872553f
--- /dev/null
+++ b/src/pyclblast/samples/sgemm.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
+# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import numpy as np
+import pyopencl as cl
+from pyopencl.array import Array
+import pyclblast
+
+# Settings for this sample
+dtype = 'float32'
+
+print("# Setting up OpenCL")
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+print("# Setting up Numpy arrays")
+m, n, k = 2, 3, 4
+a = np.random.rand(m, k).astype(dtype=dtype)
+b = np.random.rand(k, n).astype(dtype=dtype)
+c = np.random.rand(m, n).astype(dtype=dtype)
+
+print("# Setting up OpenCL arrays")
+cla = Array(queue, a.shape, a.dtype)
+clb = Array(queue, b.shape, b.dtype)
+clc = Array(queue, c.shape, c.dtype)
+cla.set(a)
+clb.set(b)
+clc.set(c)
+
+print("# Example level-3 operation: GEMM")
+pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
+print("# Matrix C result: %s" % clc.get())
+print("# Expected result: %s" % (np.dot(a, b)))
diff --git a/src/pyclblast/samples/sgemv.py b/src/pyclblast/samples/sgemv.py
new file mode 100644
index 00000000..196c838d
--- /dev/null
+++ b/src/pyclblast/samples/sgemv.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
+# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import numpy as np
+import pyopencl as cl
+from pyopencl.array import Array
+import pyclblast
+
+# Settings for this sample
+dtype = 'float32'
+m, n = 4, 3
+alpha = 1.0
+beta = 0.0
+
+print("# Setting up OpenCL")
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+print("# Setting up Numpy arrays")
+a = np.random.rand(m, n).astype(dtype=dtype)
+x = np.random.rand(n).astype(dtype=dtype)
+y = np.random.rand(m).astype(dtype=dtype)
+
+print("# Setting up OpenCL arrays")
+cla = Array(queue, a.shape, a.dtype)
+clx = Array(queue, x.shape, x.dtype)
+cly = Array(queue, y.shape, y.dtype)
+cla.set(a)
+clx.set(x)
+cly.set(y)
+
+print("# Example level-2 operation: GEMV")
+pyclblast.gemv(queue, m, n, cla, clx, cly, a_ld=n, alpha=alpha, beta=beta)
+print("# Result for vector y: %s" % cly.get())
+print("# Expected result: %s" % (alpha * np.dot(a, x) + beta * y))
diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py
new file mode 100644
index 00000000..b6b95d1a
--- /dev/null
+++ b/src/pyclblast/setup.py
@@ -0,0 +1,45 @@
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
+# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+from setuptools import setup
+
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+
+ext_modules = list()
+ext_modules.append(
+ Extension(
+ "pyclblast",
+ ["src/pyclblast.pyx"],
+ libraries=["clblast"],
+ language="c++"
+ )
+)
+
+setup(
+ name="pyclblast",
+ version="1.0.0",
+ author="Cedric Nugteren",
+ author_email="web@cedricnugteren.nl",
+ url="https://github.com/CNugteren/CLBlast/blob/master/src/pyclblast",
+ description="Python bindings for CLBlast, the tuned OpenCL BLAS library",
+ license="Apache Software License",
+ requires=["numpy", "pyopencl", "cython"],
+ package_dir={'': 'src'},
+ scripts=[],
+ ext_modules=ext_modules,
+ cmdclass={"build_ext": build_ext},
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Intended Audience :: Developers',
+ 'Topic :: Software Development :: Libraries',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 3',
+ ],
+ keywords="OpenCL BLAS CLBlast GEMM matrix-multiplication"
+)
diff --git a/src/pyclblast/src/pyclblast.pyx b/src/pyclblast/src/pyclblast.pyx
new file mode 100644
index 00000000..9529400c
--- /dev/null
+++ b/src/pyclblast/src/pyclblast.pyx
@@ -0,0 +1,1901 @@
+
+####################################################################################################
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+#
+# This file defines the Python interface to CLBlast. It is inspired by:
+# https://github.com/hunse/pyopencl_blas
+#
+####################################################################################################
+
+import numpy as np
+import pyopencl as cl
+from pyopencl.array import Array
+
+from libcpp cimport bool
+
+####################################################################################################
+# CLBlast and OpenCL data-types
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+
+ # Status codes
+ ctypedef enum CLBlastStatusCode:
+ CLBlastSuccess
+ CLBlastOpenCLCompilerNotAvailable
+ CLBlastTempBufferAllocFailure
+ CLBlastOpenCLOutOfResources
+ CLBlastOpenCLOutOfHostMemory
+ CLBlastOpenCLBuildProgramFailure
+ CLBlastInvalidValue
+ CLBlastInvalidCommandQueue
+ CLBlastInvalidMemObject
+ CLBlastInvalidBinary
+ CLBlastInvalidBuildOptions
+ CLBlastInvalidProgram
+ CLBlastInvalidProgramExecutable
+ CLBlastInvalidKernelName
+ CLBlastInvalidKernelDefinition
+ CLBlastInvalidKernel
+ CLBlastInvalidArgIndex
+ CLBlastInvalidArgValue
+ CLBlastInvalidArgSize
+ CLBlastInvalidKernelArgs
+ CLBlastInvalidLocalNumDimensions
+ CLBlastInvalidLocalThreadsTotal
+ CLBlastInvalidLocalThreadsDim
+ CLBlastInvalidGlobalOffset
+ CLBlastInvalidEventWaitList
+ CLBlastInvalidEvent
+ CLBlastInvalidOperation
+ CLBlastInvalidBufferSize
+ CLBlastInvalidGlobalWorkSize
+ CLBlastNotImplemented
+ CLBlastInvalidMatrixA
+ CLBlastInvalidMatrixB
+ CLBlastInvalidMatrixC
+ CLBlastInvalidVectorX
+ CLBlastInvalidVectorY
+ CLBlastInvalidDimension
+ CLBlastInvalidLeadDimA
+ CLBlastInvalidLeadDimB
+ CLBlastInvalidLeadDimC
+ CLBlastInvalidIncrementX
+ CLBlastInvalidIncrementY
+ CLBlastInsufficientMemoryA
+ CLBlastInsufficientMemoryB
+ CLBlastInsufficientMemoryC
+ CLBlastInsufficientMemoryX
+ CLBlastInsufficientMemoryY
+ CLBlastInvalidBatchCount
+ CLBlastInvalidOverrideKernel
+ CLBlastMissingOverrideParameter
+ CLBlastInvalidLocalMemUsage
+ CLBlastNoHalfPrecision
+ CLBlastNoDoublePrecision
+ CLBlastInvalidVectorScalar
+ CLBlastInsufficientMemoryScalar
+ CLBlastDatabaseError
+ CLBlastUnknownError
+ CLBlastUnexpectedError
+
+ # OpenCL data-types
+ ctypedef float cl_float
+ ctypedef double cl_double
+ ctypedef unsigned int cl_uint
+ ctypedef struct cl_float2:
+ cl_float x
+ cl_float y
+ ctypedef struct cl_double2:
+ cl_double x
+ cl_double y
+
+ # OpenCL special data-types
+ struct _cl_mem:
+ pass
+ struct _cl_command_queue:
+ pass
+ struct _cl_event:
+ pass
+ ctypedef _cl_mem* cl_mem
+ ctypedef _cl_command_queue* cl_command_queue
+ ctypedef _cl_event* cl_event
+
+ # Matrix layout and transpose types
+ ctypedef enum CLBlastLayout:
+ CLBlastLayoutRowMajor
+ CLBlastLayoutColMajor
+ ctypedef enum CLBlastTranspose:
+ CLBlastTransposeNo
+ CLBlastTransposeYes
+ CLBlastTransposeConjugate
+ ctypedef enum CLBlastTriangle:
+ CLBlastTriangleUpper
+ CLBlastTriangleLower
+ ctypedef enum CLBlastDiagonal:
+ CLBlastDiagonalNonUnit
+ CLBlastDiagonalUnit
+ ctypedef enum CLBlastSide:
+ CLBlastSideLeft
+ CLBlastSideRight
+
+ # Precision enum
+ ctypedef enum CLBlastPrecision:
+ CLBlastPrecisionSingle
+ CLBlastPrecisionDouble
+ CLBlastPrecisionComplexSingle
+ CLBlastPrecisionComplexDouble
+
+# Translates status codes into readable messages
+cdef get_status_message(CLBlastStatusCode status):
+ if status == CLBlastSuccess:
+ return "CLBlastSuccess"
+ if status == CLBlastOpenCLCompilerNotAvailable:
+ return "CLBlastOpenCLCompilerNotAvailable: CL_COMPILER_NOT_AVAILABLE"
+ if status == CLBlastTempBufferAllocFailure:
+ return "CLBlastTempBufferAllocFailure: CL_MEM_OBJECT_ALLOCATION_FAILURE"
+ if status == CLBlastOpenCLOutOfResources:
+ return "CLBlastOpenCLOutOfResources: CL_OUT_OF_RESOURCES"
+ if status == CLBlastOpenCLOutOfHostMemory:
+ return "CLBlastOpenCLOutOfHostMemory: CL_OUT_OF_HOST_MEMORY"
+ if status == CLBlastOpenCLBuildProgramFailure:
+ return "CLBlastOpenCLBuildProgramFailure: CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error"
+ if status == CLBlastInvalidValue:
+ return "CLBlastInvalidValue: CL_INVALID_VALUE"
+ if status == CLBlastInvalidCommandQueue:
+ return "CLBlastInvalidCommandQueue: CL_INVALID_COMMAND_QUEUE"
+ if status == CLBlastInvalidMemObject:
+ return "CLBlastInvalidMemObject: CL_INVALID_MEM_OBJECT"
+ if status == CLBlastInvalidBinary:
+ return "CLBlastInvalidBinary: CL_INVALID_BINARY"
+ if status == CLBlastInvalidBuildOptions:
+ return "CLBlastInvalidBuildOptions: CL_INVALID_BUILD_OPTIONS"
+ if status == CLBlastInvalidProgram:
+ return "CLBlastInvalidProgram: CL_INVALID_PROGRAM"
+ if status == CLBlastInvalidProgramExecutable:
+ return "CLBlastInvalidProgramExecutable: CL_INVALID_PROGRAM_EXECUTABLE"
+ if status == CLBlastInvalidKernelName:
+ return "CLBlastInvalidKernelName: CL_INVALID_KERNEL_NAME"
+ if status == CLBlastInvalidKernelDefinition:
+ return "CLBlastInvalidKernelDefinition: CL_INVALID_KERNEL_DEFINITION"
+ if status == CLBlastInvalidKernel:
+ return "CLBlastInvalidKernel: CL_INVALID_KERNEL"
+ if status == CLBlastInvalidArgIndex:
+ return "CLBlastInvalidArgIndex: CL_INVALID_ARG_INDEX"
+ if status == CLBlastInvalidArgValue:
+ return "CLBlastInvalidArgValue: CL_INVALID_ARG_VALUE"
+ if status == CLBlastInvalidArgSize:
+ return "CLBlastInvalidArgSize: CL_INVALID_ARG_SIZE"
+ if status == CLBlastInvalidKernelArgs:
+ return "CLBlastInvalidKernelArgs: CL_INVALID_KERNEL_ARGS"
+ if status == CLBlastInvalidLocalNumDimensions:
+ return "CLBlastInvalidLocalNumDimensions: CL_INVALID_WORK_DIMENSION: Too many thread dimensions"
+ if status == CLBlastInvalidLocalThreadsTotal:
+ return "CLBlastInvalidLocalThreadsTotal: CL_INVALID_WORK_GROUP_SIZE: Too many threads in total"
+ if status == CLBlastInvalidLocalThreadsDim:
+ return "CLBlastInvalidLocalThreadsDim: CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension"
+ if status == CLBlastInvalidGlobalOffset:
+ return "CLBlastInvalidGlobalOffset: CL_INVALID_GLOBAL_OFFSET"
+ if status == CLBlastInvalidEventWaitList:
+ return "CLBlastInvalidEventWaitList: CL_INVALID_EVENT_WAIT_LIST"
+ if status == CLBlastInvalidEvent:
+ return "CLBlastInvalidEvent: CL_INVALID_EVENT"
+ if status == CLBlastInvalidOperation:
+ return "CLBlastInvalidOperation: CL_INVALID_OPERATION"
+ if status == CLBlastInvalidBufferSize:
+ return "CLBlastInvalidBufferSize: CL_INVALID_BUFFER_SIZE"
+ if status == CLBlastInvalidGlobalWorkSize:
+ return "CLBlastInvalidGlobalWorkSize: CL_INVALID_GLOBAL_WORK_SIZE"
+ if status == CLBlastNotImplemented:
+ return "CLBlastNotImplemented: Routine or functionality not implemented yet"
+ if status == CLBlastInvalidMatrixA:
+ return "CLBlastInvalidMatrixA: Matrix A is not a valid OpenCL buffer"
+ if status == CLBlastInvalidMatrixB:
+ return "CLBlastInvalidMatrixB: Matrix B is not a valid OpenCL buffer"
+ if status == CLBlastInvalidMatrixC:
+ return "CLBlastInvalidMatrixC: Matrix C is not a valid OpenCL buffer"
+ if status == CLBlastInvalidVectorX:
+ return "CLBlastInvalidVectorX: Vector X is not a valid OpenCL buffer"
+ if status == CLBlastInvalidVectorY:
+ return "CLBlastInvalidVectorY: Vector Y is not a valid OpenCL buffer"
+ if status == CLBlastInvalidDimension:
+ return "CLBlastInvalidDimension: Dimensions M, N, and K have to be larger than zero"
+ if status == CLBlastInvalidLeadDimA:
+ return "CLBlastInvalidLeadDimA: LD of A is smaller than the matrix's first dimension"
+ if status == CLBlastInvalidLeadDimB:
+ return "CLBlastInvalidLeadDimB: LD of B is smaller than the matrix's first dimension"
+ if status == CLBlastInvalidLeadDimC:
+ return "CLBlastInvalidLeadDimC: LD of C is smaller than the matrix's first dimension"
+ if status == CLBlastInvalidIncrementX:
+ return "CLBlastInvalidIncrementX: Increment of vector X cannot be zero"
+ if status == CLBlastInvalidIncrementY:
+ return "CLBlastInvalidIncrementY: Increment of vector Y cannot be zero"
+ if status == CLBlastInsufficientMemoryA:
+ return "CLBlastInsufficientMemoryA: Matrix A's OpenCL buffer is too small"
+ if status == CLBlastInsufficientMemoryB:
+ return "CLBlastInsufficientMemoryB: Matrix B's OpenCL buffer is too small"
+ if status == CLBlastInsufficientMemoryC:
+ return "CLBlastInsufficientMemoryC: Matrix C's OpenCL buffer is too small"
+ if status == CLBlastInsufficientMemoryX:
+ return "CLBlastInsufficientMemoryX: Vector X's OpenCL buffer is too small"
+ if status == CLBlastInsufficientMemoryY:
+ return "CLBlastInsufficientMemoryY: Vector Y's OpenCL buffer is too small"
+ if status == CLBlastInvalidBatchCount:
+ return "CLBlastInvalidBatchCount: The batch count needs to be positive"
+ if status == CLBlastInvalidOverrideKernel:
+ return "CLBlastInvalidOverrideKernel: Trying to override parameters for an invalid kernel"
+ if status == CLBlastMissingOverrideParameter:
+ return "CLBlastMissingOverrideParameter: Missing override parameter(s) for the target kernel"
+ if status == CLBlastInvalidLocalMemUsage:
+ return "CLBlastInvalidLocalMemUsage: Not enough local memory available on this device"
+ if status == CLBlastNoHalfPrecision:
+ return "CLBlastNoHalfPrecision: Half precision (16-bits) not supported by the device"
+ if status == CLBlastNoDoublePrecision:
+ return "CLBlastNoDoublePrecision: Double precision (64-bits) not supported by the device"
+ if status == CLBlastInvalidVectorScalar:
+ return "CLBlastInvalidVectorScalar: The unit-sized vector is not a valid OpenCL buffer"
+ if status == CLBlastInsufficientMemoryScalar:
+ return "CLBlastInsufficientMemoryScalar: The unit-sized vector's OpenCL buffer is too small"
+ if status == CLBlastDatabaseError:
+ return "CLBlastDatabaseError: Entry for the device was not found in the database"
+ if status == CLBlastUnknownError:
+ return "CLBlastUnknownError: A catch-all error code representing an unspecified error"
+ if status == CLBlastUnexpectedError:
+ return "CLBlastUnexpectedError: A catch-all error code representing an unexpected exception"
+ return "PyCLBlast: unrecognized CLBlast status code (code %d)" % status
+
+####################################################################################################
+# Generic helpers
+####################################################################################################
+
+dtype_size = {np.dtype('float32'): 4,
+ np.dtype('float64'): 8,
+ np.dtype('complex64'): 8,
+ np.dtype('complex128'): 16}
+
+def dtypes_str(dtypes):
+ if len(dtypes) == 1:
+ return "'%s'" % dtypes[0]
+ return "one of %s" % dtypes
+
+
+def check_dtype(args, dtypes):
+ dtype = args[0].dtype
+ if not all(arg.dtype == dtype for arg in args):
+ raise ValueError("PyCLBlast: All arguments must have the same dtype (%s)" % dtypes_str(dtypes))
+ if dtype not in dtypes:
+ raise ValueError("PyCLBlast: Data type must be %s" % dtypes_str(dtypes))
+ return dtype
+
+
+def check_array(a, ndim, name):
+ if not isinstance(a, Array):
+ raise ValueError("PyCLBlast: '%s' must be a PyOpenCL Array" % name)
+ if not len(a.shape) == ndim:
+ raise ValueError("PyCLBlast: '%s' must have %d dimensions (got %d)" % (name, ndim, len(a.shape)))
+
+
+def check_matrix(a, name):
+ check_array(a, 2, name)
+
+
+def check_vector(a, name):
+ check_array(a, 1, name)
+
+
+####################################################################################################
+# Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZswap(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXswap' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSscal(const size_t n, const float alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDscal(const size_t n, const double alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCscal(const size_t n, const cl_float2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0):
+ dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSscal(n, <cl_float>alpha, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDscal(n, <cl_double>alpha, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCscal(n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZscal(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXscal' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastScopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastScopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZcopy(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXcopy' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSaxpy(const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDaxpy(const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCaxpy(const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSaxpy(n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDaxpy(n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCaxpy(n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZaxpy(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXaxpy' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Dot product of two vectors: SDOT/DDOT/HDOT
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ dtype = check_dtype([x, y, dot], ["float32", "float64"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(dot, "dot")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem dot_buffer = <cl_mem><size_t>dot.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDdot(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXdot' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Dot product of two complex vectors: CDOTU/ZDOTU
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def dotu(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ dtype = check_dtype([x, y, dot], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(dot, "dot")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem dot_buffer = <cl_mem><size_t>dot.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZdotu(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXdotu' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def dotc(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ dtype = check_dtype([x, y, dot], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(dot, "dot")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem dot_buffer = <cl_mem><size_t>dot.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZdotc(n, dot_buffer, dot_offset, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXdotc' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastScnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0):
+ dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(nrm2, "nrm2")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem nrm2_buffer = <cl_mem><size_t>nrm2.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastScnrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastDznrm2(n, nrm2_buffer, nrm2_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXnrm2' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastScasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0):
+ dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(asum, "asum")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem asum_buffer = <cl_mem><size_t>asum.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastScasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastDzasum(n, asum_buffer, asum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXasum' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastScsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0):
+ dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(sum, "sum")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem sum_buffer = <cl_mem><size_t>sum.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastScsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastDzsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsum' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastiSamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiDamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiCamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
+ dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(imax, "imax")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem imax_buffer = <cl_mem><size_t>imax.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastiSamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastiDamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastiCamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastiZamax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXamax' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastiSamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiDamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiCamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
+ dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(imin, "imin")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem imin_buffer = <cl_mem><size_t>imin.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastiSamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastiDamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastiCamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastiZamin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXamin' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastiSmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiDmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiCmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
+ dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(imax, "imax")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem imax_buffer = <cl_mem><size_t>imax.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastiSmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastiDmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastiCmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastiZmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXmax' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastiSmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiDmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiCmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
+ dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(imin, "imin")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem imin_buffer = <cl_mem><size_t>imin.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastiSmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastiDmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastiCmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastiZmin(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXmin' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXgemv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXgbmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian matrix-vector multiplication: CHEMV/ZHEMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def hemv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChemv(CLBlastLayoutRowMajor, triangle, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhemv(CLBlastLayoutRowMajor, triangle, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhemv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def hbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhbmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([ap, x, y], ["complex64", "complex128"])
+ check_matrix(ap, "ap")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChpmv(CLBlastLayoutRowMajor, triangle, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_float2>cl_float2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhpmv(CLBlastLayoutRowMajor, triangle, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhpmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["float32", "float64"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsymv(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsymv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([a, x, y], ["float32", "float64"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsbmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
+
+def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0):
+ dtype = check_dtype([ap, x, y], ["float32", "float64"])
+ check_matrix(ap, "ap")
+ check_vector(x, "x")
+ check_vector(y, "y")
+
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSspmv(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_float>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXspmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtrmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtrmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtbmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, k, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtbmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0):
+ dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(ap, "ap")
+ check_vector(x, "x")
+
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtpmv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtpmv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
+
+def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_vector(x, "x")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtrsv(CLBlastLayoutRowMajor, triangle, a_transpose, diagonal, n, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtrsv' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General rank-1 matrix update: SGER/DGER/HGER
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSger(const CLBlastLayout layout, const size_t m, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, y, a], ["float32", "float64"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSger(CLBlastLayoutRowMajor, m, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDger(CLBlastLayoutRowMajor, m, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXger' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General rank-1 complex matrix update: CGERU/ZGERU
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def geru(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, y, a], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCgeru(CLBlastLayoutRowMajor, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZgeru(CLBlastLayoutRowMajor, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXgeru' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General rank-1 complex conjugated matrix update: CGERC/ZGERC
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def gerc(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, y, a], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCgerc(CLBlastLayoutRowMajor, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZgerc(CLBlastLayoutRowMajor, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXgerc' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian rank-1 matrix update: CHER/ZHER
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def her(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, a], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCher(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZher(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXher' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian packed rank-1 matrix update: CHPR/ZHPR
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+
+def hpr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0):
+ dtype = check_dtype([x, ap], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_matrix(ap, "ap")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChpr(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhpr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhpr' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian rank-2 matrix update: CHER2/ZHER2
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def her2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, y, a], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCher2(CLBlastLayoutRowMajor, triangle, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZher2(CLBlastLayoutRowMajor, triangle, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXher2' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian packed rank-2 matrix update: CHPR2/ZHPR2
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_float2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+
+def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0):
+ dtype = check_dtype([x, y, ap], ["complex64", "complex128"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(ap, "ap")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChpr2(CLBlastLayoutRowMajor, triangle, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhpr2(CLBlastLayoutRowMajor, triangle, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhpr2' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric rank-1 matrix update: SSYR/DSYR/HSYR
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, a], ["float32", "float64"])
+ check_vector(x, "x")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsyr(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsyr' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+
+def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0):
+ dtype = check_dtype([x, ap], ["float32", "float64"])
+ check_vector(x, "x")
+ check_matrix(ap, "ap")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSspr(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXspr' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
+
+def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0):
+ dtype = check_dtype([x, y, a], ["float32", "float64"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(a, "a")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsyr2' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const float alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
+
+def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0):
+ dtype = check_dtype([x, y, ap], ["float32", "float64"])
+ check_vector(x, "x")
+ check_vector(y, "y")
+ check_matrix(ap, "ap")
+
+ cdef cl_mem x_buffer = <cl_mem><size_t>x.base_data.int_ptr
+ cdef cl_mem y_buffer = <cl_mem><size_t>y.base_data.int_ptr
+ cdef cl_mem ap_buffer = <cl_mem><size_t>ap.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSspr2(CLBlastLayoutRowMajor, triangle, n, <cl_float>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXspr2' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ b_transpose = CLBlastTransposeYes if b_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXgemm' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ side = CLBlastSideRight if right_side else CLBlastSideLeft
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_float>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_double>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsymm' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastChemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def hemm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, b, c], ["complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ side = CLBlastSideRight if right_side else CLBlastSideLeft
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastChemm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZhemm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXhemm' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsyrk' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Rank-K update of a hermitian matrix: CHERK/ZHERK
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def herk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, c], ["complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZherk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXherk' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastSsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastSsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_float>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float2>cl_float2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXsyr2k' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastCher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
+
+def her2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ dtype = check_dtype([a, b, c], ["complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+ check_matrix(c, "c")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+ cdef cl_mem c_buffer = <cl_mem><size_t>c.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ ab_transpose = CLBlastTransposeYes if ab_transp else CLBlastTransposeNo
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("complex64"):
+ err = CLBlastCher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_float>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZher2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXher2k' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+
+def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0):
+ dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ side = CLBlastSideRight if right_side else CLBlastSideLeft
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_float>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtrmm' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################
+# Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM
+####################################################################################################
+
+cdef extern from "clblast_c.h":
+ CLBlastStatusCode CLBlastStrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const float alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastDtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastCtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_float2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+ CLBlastStatusCode CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
+
+def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0):
+ dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"])
+ check_matrix(a, "a")
+ check_matrix(b, "b")
+
+ cdef cl_mem a_buffer = <cl_mem><size_t>a.base_data.int_ptr
+ cdef cl_mem b_buffer = <cl_mem><size_t>b.base_data.int_ptr
+
+ cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr
+ cdef cl_event event = NULL
+ side = CLBlastSideRight if right_side else CLBlastSideLeft
+ triangle = CLBlastTriangleLower if lower_triangle else CLBlastTriangleUpper
+ a_transpose = CLBlastTransposeYes if a_transp else CLBlastTransposeNo
+ diagonal = CLBlastDiagonalUnit if unit_diagonal else CLBlastDiagonalNonUnit
+
+ cdef CLBlastStatusCode err
+ if dtype == np.dtype("float32"):
+ err = CLBlastStrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_float>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("float64"):
+ err = CLBlastDtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex64"):
+ err = CLBlastCtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_float2>cl_float2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ elif dtype == np.dtype("complex128"):
+ err = CLBlastZtrsm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+ else:
+ raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
+ if err != CLBlastSuccess:
+ raise RuntimeError("PyCLBlast: 'CLBlastXtrsm' failed: %s" % get_status_message(err))
+ return cl.Event.from_int_ptr(<size_t>event)
+
+####################################################################################################