summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-04-01 22:36:39 -0700
committercnugteren <web@cedricnugteren.nl>2016-04-01 22:36:39 -0700
commit5c83217cf256984573924e8f89c46f393a5fcfcd (patch)
treeb260ec46e10e12ff63d465212652523c3cfa7bc3
parenta2056f2216526989f423a74e4bcd016dac9424f4 (diff)
Added a wrapper for CBLAS libraries for performance/correctness testing
-rw-r--r--include/clblast.h2
-rw-r--r--include/clblast_c.h4
-rw-r--r--scripts/generator/datatype.py5
-rw-r--r--scripts/generator/generator.py53
-rw-r--r--scripts/generator/routine.py109
-rw-r--r--src/clblast.cc6
-rw-r--r--src/clblast_c.cc4
-rw-r--r--test/wrapper_cblas.h1667
-rw-r--r--test/wrapper_clblas.h6
9 files changed, 1825 insertions, 31 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 5e5c5a46..431f2510 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -100,7 +100,7 @@ template <typename T>
StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event = nullptr);
diff --git a/include/clblast_c.h b/include/clblast_c.h
index dcb3ae3a..f72cff3a 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -112,13 +112,13 @@ StatusCode PUBLIC_API CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset,
StatusCode PUBLIC_API CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event);
StatusCode PUBLIC_API CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event);
diff --git a/scripts/generator/datatype.py b/scripts/generator/datatype.py
index 9323bc4d..5a58ab53 100644
--- a/scripts/generator/datatype.py
+++ b/scripts/generator/datatype.py
@@ -58,5 +58,10 @@ class DataType():
return "<"+self.buffertype+","+self.beta_cpp+">, "+self.buffertype+", "+self.beta_cpp
return "<"+self.buffertype+">, "+self.buffertype+", "+self.beta_cpp
+ # Current scalar is complex
+ def IsComplex(self, scalar):
+ return ((scalar == "alpha" and self.alpha_cpp in [FLT2, DBL2]) or
+ (scalar == "beta" and self.beta_cpp in [FLT2, DBL2]))
+
# ==================================================================================================
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 6e2b2ed2..36a9bf40 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -8,12 +8,13 @@
# Cedric Nugteren <www.cedricnugteren.nl>
#
# This script automatically generates the bodies of the following files, creating the full CLBlast
-# API interface and implementation (C, C++, and clBLAS wrapper):
+# API interface and implementation (C, C++, and reference BLAS wrappers):
# clblast.h
# clblast.cc
# clblast_c.h
# clblast_c.cc
# wrapper_clblas.h
+# wrapper_cblas.h
# It also generates the main functions for the correctness and performance tests as found in
# test/correctness/routines/levelX/xYYYY.cc
# test/performance/routines/levelX/xYYYY.cc
@@ -55,7 +56,7 @@ TU = DataType("TU", "typename T, typename U", "T,U", ["T", "U", "T", "U"], "T")
routines = [
[ # Level 1: vector-vector
Routine(False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], [], "", "Generate givens plane rotation"),
- Routine(False, "1", "rotmg", T, [S,D], [], [], [], ["sd1","sd2","sx1","sy1","sparam"], [], "", "Generate modified givens plane rotation"),
+ Routine(False, "1", "rotmg", T, [S,D], [], [], ["sy1"], ["sd1","sd2","sx1","sparam"], [], "", "Generate modified givens plane rotation"),
Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["cos","sin"], "", "Apply givens plane rotation"),
Routine(False, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [], "", "Apply modified givens plane rotation"),
Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], "", "Swap two vectors"),
@@ -220,11 +221,11 @@ def wrapper_clblas(routines):
for routine in routines:
result += "\n// Forwards the clBLAS calls for %s\n" % (routine.ShortNames())
if routine.NoScalars():
- result += routine.RoutineHeaderWrapper(routine.template, True, 21)+";\n"
+ result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n"
for flavour in routine.flavours:
indent = " "*(17 + routine.Length())
- result += routine.RoutineHeaderWrapper(flavour, False, 21)+" {\n"
- arguments = routine.ArgumentsWrapper(flavour)
+ result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
+ arguments = routine.ArgumentsWrapperCL(flavour)
if routine.scratch:
result += " auto queue = Queue(queues[0]);\n"
result += " auto context = queue.GetContext();\n"
@@ -236,6 +237,41 @@ def wrapper_clblas(routines):
result += "\n}\n"
return result
+# The wrapper to the reference CBLAS routines (for performance/correctness testing)
+def wrapper_cblas(routines):
+ result = ""
+ for routine in routines:
+ result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNames())
+ for flavour in routine.flavours:
+ indent = " "*(10 + routine.Length())
+ result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
+ arguments = routine.ArgumentsWrapperC(flavour)
+
+ # Double-precision scalars
+ for scalar in routine.scalars:
+ if flavour.IsComplex(scalar):
+ result += " const auto "+scalar+"_array = std::vector<"+flavour.buffertype[:-1]+">{"+scalar+".real(), "+scalar+".imag()};\n"
+
+ # Special case for scalar outputs
+ assignment = ""
+ postfix = ""
+ extra_argument = ""
+ for output_buffer in routine.outputs:
+ if output_buffer in routine.ScalarBuffersFirst():
+ if flavour in [C,Z]:
+ postfix += "_sub"
+ indent += " "
+ extra_argument += ",\n"+indent+"reinterpret_cast<return_pointer_"+flavour.buffertype[:-1]+">(&"+output_buffer+"_buffer["+output_buffer+"_offset])"
+ else:
+ assignment = output_buffer+"_buffer["+output_buffer+"_offset] = "
+ indent += " "*len(assignment)
+
+ result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"("
+ result += (",\n"+indent).join([a for a in arguments])
+ result += extra_argument+");"
+ result += "\n}\n"
+ return result
+
# ==================================================================================================
# Checks for the number of command-line arguments
@@ -251,9 +287,10 @@ files = [
path_clblast+"/include/clblast_c.h",
path_clblast+"/src/clblast_c.cc",
path_clblast+"/test/wrapper_clblas.h",
+ path_clblast+"/test/wrapper_cblas.h",
]
-header_lines = [84, 65, 93, 22, 22]
-footer_lines = [6, 3, 9, 2, 6]
+header_lines = [84, 65, 93, 22, 22, 31]
+footer_lines = [6, 3, 9, 2, 6, 6]
# Checks whether the command-line arguments are valid; exists otherwise
for f in files:
@@ -287,6 +324,8 @@ for i in xrange(0,len(files)):
body += clblast_c_cc(routines[level-1])
if i == 4:
body += wrapper_clblas(routines[level-1])
+ if i == 5:
+ body += wrapper_cblas(routines[level-1])
f.write("".join(file_header))
f.write(body)
f.write("".join(file_footer))
diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py
index 02040583..fffa19f6 100644
--- a/scripts/generator/routine.py
+++ b/scripts/generator/routine.py
@@ -28,7 +28,7 @@ def OptionToCLBlast(x):
}[x]
# As above, but for clBLAS data-types
-def OptionToWrapper(x):
+def OptionToWrapperCL(x):
return {
'layout': "clblasOrder",
'a_transpose': "clblasTranspose",
@@ -39,6 +39,18 @@ def OptionToWrapper(x):
'diagonal': "clblasDiag",
}[x]
+# As above, but for CBLAS data-types
+def OptionToWrapperC(x):
+ return {
+ 'layout': "CBLAS_ORDER",
+ 'a_transpose': "CBLAS_TRANSPOSE",
+ 'b_transpose': "CBLAS_TRANSPOSE",
+ 'ab_transpose': "CBLAS_TRANSPOSE",
+ 'side': "CBLAS_SIDE",
+ 'triangle': "CBLAS_UPLO",
+ 'diagonal': "CBLAS_DIAG",
+ }[x]
+
# ==================================================================================================
# Class holding routine-specific information (e.g. name, which arguments, which precisions)
@@ -119,6 +131,16 @@ class Routine():
return [", ".join(a+b+c)]
return []
+ # As above but as vectors
+ def BufferDefVector(self, name, flavour):
+ prefix = "const " if (name in self.inputs) else ""
+ if (name in self.inputs) or (name in self.outputs):
+ a = [prefix+"std::vector<"+flavour.buffertype+">& "+name+"_buffer"]
+ b = ["const size_t "+name+"_offset"]
+ c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else []
+ return [", ".join(a+b+c)]
+ return []
+
# As above but with Claduc buffers
def BufferCladuc(self, name):
if (name in self.inputs) or (name in self.outputs):
@@ -129,7 +151,7 @@ class Routine():
return []
# As above but with a static cast for clBLAS wrapper
- def BufferWrapper(self, name):
+ def BufferWrapperCL(self, name):
if (name in self.inputs) or (name in self.outputs):
a = [name+"_buffer"]
b = [name+"_offset"]
@@ -141,6 +163,24 @@ class Routine():
return [", ".join(a+b+c)]
return []
+ # As above but with a static cast for CBLAS wrapper
+ def BufferWrapperC(self, name, flavour):
+ prefix = "const " if (name in self.inputs) else ""
+ if (name in self.inputs) or (name in self.outputs):
+ if name == "sy1":
+ a = [name+"_buffer["+name+"_offset]"]
+ elif flavour.precision_name in ["C","Z"]:
+ a = ["reinterpret_cast<"+prefix+flavour.buffertype[:-1]+"*>(&"+name+"_buffer["+name+"_offset])"]
+ else:
+ a = ["&"+name+"_buffer["+name+"_offset]"]
+ c = []
+ if (name in ["x","y"]):
+ c = ["static_cast<int>("+name+"_"+self.Postfix(name)+")"]
+ elif (name in ["a","b","c"]):
+ c = [name+"_"+self.Postfix(name)]
+ return [", ".join(a+c)]
+ return []
+
# As above, but only data-types
def BufferType(self, name):
prefix = "const " if (name in self.inputs) else ""
@@ -179,6 +219,14 @@ class Routine():
return [name]
return []
+ # Retrieves the use of a scalar for CBLAS (alpha/beta)
+ def ScalarUseWrapperC(self, name, flavour):
+ if name in self.scalars:
+ if flavour.IsComplex(name):
+ return [name+"_array.data()"]
+ return [name]
+ return []
+
# Retrieves the definition of a scalar (alpha/beta)
def ScalarDef(self, name, flavour):
if name in self.scalars:
@@ -246,9 +294,16 @@ class Routine():
return []
# As above, but now using clBLAS data-types
- def OptionsDefWrapper(self):
+ def OptionsDefWrapperCL(self):
+ if self.options:
+ definitions = ["const "+OptionToWrapperCL(o)+" "+o for o in self.options]
+ return [", ".join(definitions)]
+ return []
+
+ # As above, but now using CBLAS data-types
+ def OptionsDefWrapperC(self):
if self.options:
- definitions = ["const "+OptionToWrapper(o)+" "+o for o in self.options]
+ definitions = ["const "+OptionToWrapperC(o)+" "+o for o in self.options]
return [", ".join(definitions)]
return []
@@ -284,16 +339,26 @@ class Routine():
list(chain(*[self.ScalarUse(s, flavour) for s in self.OtherScalars()])))
# As above, but for the clBLAS wrapper
- def ArgumentsWrapper(self, flavour):
+ def ArgumentsWrapperCL(self, flavour):
return (self.Options() + self.Sizes() +
- list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersFirst()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarUseWrapper("alpha", flavour) +
- list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersFirst()])) +
self.ScalarUseWrapper("beta", flavour) +
- list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersSecond()])) +
list(chain(*[self.ScalarUseWrapper(s, flavour) for s in self.OtherScalars()])))
+ # As above, but for the CBLAS wrapper
+ def ArgumentsWrapperC(self, flavour):
+ return (self.Options() + self.Sizes() +
+ self.ScalarUseWrapperC("alpha", flavour) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersFirst()])) +
+ self.ScalarUseWrapperC("beta", flavour) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarUseWrapperC(s, flavour) for s in self.OtherScalars()])))
+
# Retrieves a combination of all the argument definitions
def ArgumentsDef(self, flavour):
return (self.OptionsDef() + self.SizesDef() +
@@ -306,8 +371,8 @@ class Routine():
list(chain(*[self.ScalarDef(s, flavour) for s in self.OtherScalars()])))
# As above, but clBLAS wrapper plain datatypes
- def ArgumentsDefWrapper(self, flavour):
- return (self.OptionsDefWrapper() + self.SizesDef() +
+ def ArgumentsDefWrapperCL(self, flavour):
+ return (self.OptionsDefWrapperCL() + self.SizesDef() +
list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarDefPlain("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
@@ -315,6 +380,17 @@ class Routine():
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) +
list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()])))
+
+ # As above, but CBLAS wrapper plain datatypes
+ def ArgumentsDefWrapperC(self, flavour):
+ return (self.OptionsDefWrapperC() + self.SizesDef() +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersFirst()])) +
+ self.ScalarDefPlain("alpha", flavour) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersFirst()])) +
+ self.ScalarDefPlain("beta", flavour) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()])))
# Retrieves a combination of all the argument types
def ArgumentsType(self, flavour):
@@ -356,7 +432,7 @@ class Routine():
return result
# As above, but now for the clBLAS wrapper
- def RoutineHeaderWrapper(self, flavour, def_only, spaces):
+ def RoutineHeaderWrapperCL(self, flavour, def_only, spaces):
template = "<"+flavour.template+">" if self.NoScalars() and not def_only else ""
indent = " "*(spaces + self.Length() + len(template))
result = ""
@@ -366,9 +442,16 @@ class Routine():
result += flavour.name
result += ">\n"
result += "clblasStatus clblasX"+self.name+template+"("
- result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapper(flavour)])
+ result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperCL(flavour)])
result += ",\n"+indent+"cl_uint num_queues, cl_command_queue *queues"
result += ",\n"+indent+"cl_uint num_wait_events, const cl_event *wait_events, cl_event *events)"
return result
+ # As above, but now for the CBLAS wrapper
+ def RoutineHeaderWrapperC(self, flavour, def_only, spaces):
+ indent = " "*(spaces + self.Length())
+ result = "void cblasX"+self.name+"("
+ result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperC(flavour)])+")"
+ return result
+
# ==================================================================================================
diff --git a/src/clblast.cc b/src/clblast.cc
index fc50ffae..75893ee9 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -93,7 +93,7 @@ template <typename T>
StatusCode Rotmg(cl_mem, const size_t,
cl_mem, const size_t,
cl_mem, const size_t,
- cl_mem, const size_t,
+ const cl_mem, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*) {
return StatusCode::kNotImplemented;
@@ -101,13 +101,13 @@ StatusCode Rotmg(cl_mem, const size_t,
template StatusCode PUBLIC_API Rotmg<float>(cl_mem, const size_t,
cl_mem, const size_t,
cl_mem, const size_t,
- cl_mem, const size_t,
+ const cl_mem, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
template StatusCode PUBLIC_API Rotmg<double>(cl_mem, const size_t,
cl_mem, const size_t,
cl_mem, const size_t,
- cl_mem, const size_t,
+ const cl_mem, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 6d10c686..23e97bd5 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -55,7 +55,7 @@ StatusCode CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset,
StatusCode CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
auto status = clblast::Rotmg<float>(sd1_buffer, sd1_offset,
@@ -69,7 +69,7 @@ StatusCode CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
StatusCode CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
auto status = clblast::Rotmg<double>(sd1_buffer, sd1_offset,
diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h
new file mode 100644
index 00000000..c690a45c
--- /dev/null
+++ b/test/wrapper_cblas.h
@@ -0,0 +1,1667 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements a wrapper around a CPU BLAS library, such that its routines can be called
+// in a similar way as the CLBlast routines: using alpha and beta to determine the precision.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_WRAPPER_CBLAS_H_
+#define CLBLAST_TEST_WRAPPER_CBLAS_H_
+
+#include <cblas.h>
+
+#include "internal/utilities.h"
+
+namespace clblast {
+
+// OpenBLAS is not fully Netlib CBLAS compatible
+#ifdef OPENBLAS_VERSION
+ using return_pointer_float = openblas_complex_float*;
+ using return_pointer_double = openblas_complex_double*;
+#else
+ using return_pointer_float = void*;
+ using return_pointer_double = void*;
+#endif
+
+// =================================================================================================
+// BLAS level-1 (vector-vector) routines
+// =================================================================================================
+
+// Forwards the Netlib BLAS calls for SROTG/DROTG
+void cblasXrotg(std::vector<float>& sa_buffer, const size_t sa_offset,
+ std::vector<float>& sb_buffer, const size_t sb_offset,
+ std::vector<float>& sc_buffer, const size_t sc_offset,
+ std::vector<float>& ss_buffer, const size_t ss_offset) {
+ cblas_srotg(&sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+}
+void cblasXrotg(std::vector<double>& sa_buffer, const size_t sa_offset,
+ std::vector<double>& sb_buffer, const size_t sb_offset,
+ std::vector<double>& sc_buffer, const size_t sc_offset,
+ std::vector<double>& ss_buffer, const size_t ss_offset) {
+ cblas_drotg(&sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+}
+
+// Forwards the Netlib BLAS calls for SROTMG/DROTMG
+void cblasXrotmg(std::vector<float>& sd1_buffer, const size_t sd1_offset,
+ std::vector<float>& sd2_buffer, const size_t sd2_offset,
+ std::vector<float>& sx1_buffer, const size_t sx1_offset,
+ const std::vector<float>& sy1_buffer, const size_t sy1_offset,
+ std::vector<float>& sparam_buffer, const size_t sparam_offset) {
+ cblas_srotmg(&sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+}
+void cblasXrotmg(std::vector<double>& sd1_buffer, const size_t sd1_offset,
+ std::vector<double>& sd2_buffer, const size_t sd2_offset,
+ std::vector<double>& sx1_buffer, const size_t sx1_offset,
+ const std::vector<double>& sy1_buffer, const size_t sy1_offset,
+ std::vector<double>& sparam_buffer, const size_t sparam_offset) {
+ cblas_drotmg(&sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+}
+
+// Forwards the Netlib BLAS calls for SROT/DROT
+void cblasXrot(const size_t n,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ const float cos,
+ const float sin) {
+ cblas_srot(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ cos,
+ sin);
+}
+void cblasXrot(const size_t n,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ const double cos,
+ const double sin) {
+ cblas_drot(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ cos,
+ sin);
+}
+
+// Forwards the Netlib BLAS calls for SROTM/DROTM
+void cblasXrotm(const size_t n,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float>& sparam_buffer, const size_t sparam_offset) {
+ cblas_srotm(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+}
+void cblasXrotm(const size_t n,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double>& sparam_buffer, const size_t sparam_offset) {
+ cblas_drotm(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+}
+
+// Forwards the Netlib BLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
+void cblasXswap(const size_t n,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_sswap(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXswap(const size_t n,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dswap(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXswap(const size_t n,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_cswap(n,
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXswap(const size_t n,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_zswap(n,
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
+void cblasXscal(const size_t n,
+ const float alpha,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_sscal(n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXscal(const size_t n,
+ const double alpha,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dscal(n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXscal(const size_t n,
+ const float2 alpha,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_cscal(n,
+ alpha_array.data(),
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXscal(const size_t n,
+ const double2 alpha,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zscal(n,
+ alpha_array.data(),
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
+void cblasXcopy(const size_t n,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_scopy(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXcopy(const size_t n,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dcopy(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXcopy(const size_t n,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_ccopy(n,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXcopy(const size_t n,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_zcopy(n,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
+void cblasXaxpy(const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_saxpy(n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXaxpy(const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_daxpy(n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXaxpy(const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_caxpy(n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXaxpy(const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zaxpy(n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SDOT/DDOT
+void cblasXdot(const size_t n,
+ std::vector<float>& dot_buffer, const size_t dot_offset,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ dot_buffer[dot_offset] = cblas_sdot(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXdot(const size_t n,
+ std::vector<double>& dot_buffer, const size_t dot_offset,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ dot_buffer[dot_offset] = cblas_ddot(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for CDOTU/ZDOTU
+void cblasXdotu(const size_t n,
+ std::vector<float2>& dot_buffer, const size_t dot_offset,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_cdotu_sub(n,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
+}
+void cblasXdotu(const size_t n,
+ std::vector<double2>& dot_buffer, const size_t dot_offset,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_zdotu_sub(n,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
+}
+
+// Forwards the Netlib BLAS calls for CDOTC/ZDOTC
+void cblasXdotc(const size_t n,
+ std::vector<float2>& dot_buffer, const size_t dot_offset,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_cdotc_sub(n,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
+}
+void cblasXdotc(const size_t n,
+ std::vector<double2>& dot_buffer, const size_t dot_offset,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_zdotc_sub(n,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
+}
+
+// Forwards the Netlib BLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
+void cblasXnrm2(const size_t n,
+ std::vector<float>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ nrm2_buffer[nrm2_offset] = cblas_snrm2(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXnrm2(const size_t n,
+ std::vector<double>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ nrm2_buffer[nrm2_offset] = cblas_dnrm2(n,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXnrm2(const size_t n,
+ std::vector<float2>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ nrm2_buffer[nrm2_offset] = cblas_scnrm2(n,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXnrm2(const size_t n,
+ std::vector<double2>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ nrm2_buffer[nrm2_offset] = cblas_dznrm2(n,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// =================================================================================================
+// BLAS level-2 (matrix-vector) routines
+// =================================================================================================
+
+// Forwards the Netlib BLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_sgemv(layout, a_transpose,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dgemv(layout, a_transpose,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_cgemv(layout, a_transpose,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zgemv(layout, a_transpose,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_sgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_cgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for CHEMV/ZHEMV
+void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_chemv(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zhemv(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for CHBMV/ZHBMV
+void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_chbmv(layout, triangle,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zhbmv(layout, triangle,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for CHPMV/ZHPMV
+void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& ap_buffer, const size_t ap_offset,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_chpmv(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& ap_buffer, const size_t ap_offset,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zhpmv(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ beta_array.data(),
+ reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SSYMV/DSYMV
+void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_ssymv(layout, triangle,
+ n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dsymv(layout, triangle,
+ n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SSBMV/DSBMV
+void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_ssbmv(layout, triangle,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dsbmv(layout, triangle,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for SSPMV/DSPMV
+void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& ap_buffer, const size_t ap_offset,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_sspmv(layout, triangle,
+ n,
+ alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& ap_buffer, const size_t ap_offset,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ cblas_dspmv(layout, triangle,
+ n,
+ alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+}
+
+// Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_strmv(layout, triangle, a_transpose, diagonal,
+ n,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_stbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float>& ap_buffer, const size_t ap_offset,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_stpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double>& ap_buffer, const size_t ap_offset,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float2>& ap_buffer, const size_t ap_offset,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double2>& ap_buffer, const size_t ap_offset,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
+void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_strsv(layout, triangle, a_transpose, diagonal,
+ n,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtrsv(layout, triangle, a_transpose, diagonal,
+ n,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctrsv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztrsv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for STBSV/DTBSV/CTBSV/ZTBSV
+void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_stbsv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtbsv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctbsv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztbsv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for STPSV/DTPSV/CTPSV/ZTPSV
+void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float>& ap_buffer, const size_t ap_offset,
+ std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_stpsv(layout, triangle, a_transpose, diagonal,
+ n,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double>& ap_buffer, const size_t ap_offset,
+ std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_dtpsv(layout, triangle, a_transpose, diagonal,
+ n,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+}
+void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<float2>& ap_buffer, const size_t ap_offset,
+ std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ctpsv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<double2>& ap_buffer, const size_t ap_offset,
+ std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ cblas_ztpsv(layout, triangle, a_transpose, diagonal,
+ n,
+ reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+}
+
+// Forwards the Netlib BLAS calls for SGER/DGER
+void cblasXger(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_sger(layout,
+ m, n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+}
+void cblasXger(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_dger(layout,
+ m, n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+}
+
+// Forwards the Netlib BLAS calls for CGERU/ZGERU
+void cblasXgeru(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_cgeru(layout,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
+}
+void cblasXgeru(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zgeru(layout,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
+}
+
+// Forwards the Netlib BLAS calls for CGERC/ZGERC
+void cblasXgerc(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_cgerc(layout,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
+}
+void cblasXgerc(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zgerc(layout,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHER/ZHER
+void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_cher(layout, triangle,
+ n,
+ alpha,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
+}
+void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_zher(layout, triangle,
+ n,
+ alpha,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHPR/ZHPR
+void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float2>& ap_buffer, const size_t ap_offset) {
+ cblas_chpr(layout, triangle,
+ n,
+ alpha,
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&ap_buffer[ap_offset]));
+}
+void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double2>& ap_buffer, const size_t ap_offset) {
+ cblas_zhpr(layout, triangle,
+ n,
+ alpha,
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&ap_buffer[ap_offset]));
+}
+
+// Forwards the Netlib BLAS calls for CHER2/ZHER2
+void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_cher2(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
+}
+void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zher2(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHPR2/ZHPR2
+void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float2>& ap_buffer, const size_t ap_offset) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_chpr2(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<float*>(&ap_buffer[ap_offset]));
+}
+void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double2>& ap_buffer, const size_t ap_offset) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zhpr2(layout, triangle,
+ n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<double*>(&ap_buffer[ap_offset]));
+}
+
+// Forwards the Netlib BLAS calls for SSYR/DSYR
+void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_ssyr(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+}
+void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_dsyr(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+}
+
+// Forwards the Netlib BLAS calls for SSPR/DSPR
+void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<float>& ap_buffer, const size_t ap_offset) {
+ cblas_sspr(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+}
+void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<double>& ap_buffer, const size_t ap_offset) {
+ cblas_dspr(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+}
+
+// Forwards the Netlib BLAS calls for SSYR2/DSYR2
+void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_ssyr2(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+}
+void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ cblas_dsyr2(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+}
+
+// Forwards the Netlib BLAS calls for SSPR2/DSPR2
+void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const float alpha,
+ const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<float>& ap_buffer, const size_t ap_offset) {
+ cblas_sspr2(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+}
+void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const double alpha,
+ const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<double>& ap_buffer, const size_t ap_offset) {
+ cblas_dspr2(layout, triangle,
+ n,
+ alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+}
+
+// =================================================================================================
+// BLAS level-3 (matrix-matrix) routines
+// =================================================================================================
+
+// Forwards the Netlib BLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_sgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_dgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_cgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_ssymm(layout, side, triangle,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_dsymm(layout, side, triangle,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_csymm(layout, side, triangle,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zsymm(layout, side, triangle,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHEMM/ZHEMM
+void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_chemm(layout, side, triangle,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zhemm(layout, side, triangle,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_ssyrk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_dsyrk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2 beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_csyrk(layout, triangle, a_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ beta_array.data(),
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2 beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zsyrk(layout, triangle, a_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ beta_array.data(),
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHERK/ZHERK
+void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_cherk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ beta,
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_zherk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ beta,
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_ssyr2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ cblas_dsyr2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ beta,
+ &c_buffer[c_offset], c_ld);
+}
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
+ cblas_csyr2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
+ cblas_zsyr2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
+ beta_array.data(),
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for CHER2K/ZHER2K
+void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_cher2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
+ beta,
+ reinterpret_cast<float*>(&c_buffer[c_offset]), c_ld);
+}
+void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_zher2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
+ beta,
+ reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
+}
+
+// Forwards the Netlib BLAS calls for STRMM/DTRMM/CTRMM/ZTRMM
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ cblas_strmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+}
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ cblas_dtrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+}
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_ctrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
+}
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_ztrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
+}
+
+// Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ cblas_strsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+}
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ cblas_dtrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+}
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
+ cblas_ctrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
+}
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
+ cblas_ztrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha_array.data(),
+ reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_WRAPPER_CBLAS_H_
+#endif
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index fb6e83aa..89b708b8 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -65,7 +65,7 @@ template <typename T>
clblasStatus clblasXrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
@@ -73,7 +73,7 @@ template <>
clblasStatus clblasXrotmg<float>(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
@@ -88,7 +88,7 @@ template <>
clblasStatus clblasXrotmg<double>(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
cl_mem sx1_buffer, const size_t sx1_offset,
- cl_mem sy1_buffer, const size_t sy1_offset,
+ const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {