summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/generator/generator.py68
-rw-r--r--scripts/generator/routine.py100
2 files changed, 113 insertions, 55 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 699cd9cf..9c9675b8 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -48,29 +48,55 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for
# Populates a list of routines
routines = [
-[ # Level 1
- Routine(True, 1, "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"),
- Routine(True, 1, "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"),
- Routine(True, 1, "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"),
- Routine(True, 1, "axpy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], ["alpha"], False, "Vector-times-constant plus vector"),
- Routine(True, 1, "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"),
- Routine(True, 1, "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"),
- Routine(True, 1, "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"),
+[ # Level 1: vector-vector
+ #Routine(False, "1", "rotg", T, [S,D], [], [], [], [], ["a","b","c","s"], False, "Generate plane rotation"),
+ #Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["c","s"], False, "Apply plane rotation"),
+ Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"),
+ Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"),
+ Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"),
+ Routine(True, "1", "axpy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], ["alpha"], False, "Vector-times-constant plus vector"),
+ Routine(True, "1", "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"),
+ Routine(True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"),
+ Routine(True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"),
],
-[ # Level 2
- Routine(True, 2, "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "Generalized matrix-vector multiplication"),
- Routine(True, 2, "hemv", T, [C,Z], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian matrix-vector multiplication"),
- Routine(True, 2, "symv", T, [S,D], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric matrix-vector multiplication"),
+[ # Level 2: matrix-vector
+ Routine(True, "2a", "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General matrix-vector multiplication"),
+ Routine(False, "2a", "gbmv", T, [S,D,C,Z], ["m","n","kl","ku"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General banded matrix-vector multiplication"),
+ Routine(True, "2a", "hemv", T, [C,Z], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian matrix-vector multiplication"),
+ Routine(False, "2a", "hbmv", T, [C,Z], ["n","k"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian banded matrix-vector multiplication"),
+ Routine(False, "2a", "hpmv", T, [C,Z], ["n"], ["layout","triangle"], ["ap","x"], ["y"], ["alpha","beta"], False, "Hermitian packed matrix-vector multiplication"),
+ Routine(True, "2a", "symv", T, [S,D], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric matrix-vector multiplication"),
+ Routine(False, "2a", "sbmv", T, [S,D], ["n","k"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric banded matrix-vector multiplication"),
+ Routine(False, "2a", "spmv", T, [S,D], ["n"], ["layout","triangle"], ["ap","x"], ["y"], ["alpha","beta"], False, "Symmetric packed matrix-vector multiplication"),
+ Routine(False, "2a", "trmv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], True, "Triangular matrix-vector multiplication"),
+ Routine(False, "2a", "tbmv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], True, "Triangular banded matrix-vector multiplication"),
+ Routine(False, "2a", "tpmv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], True, "Triangular packed matrix-vector multiplication"),
+ Routine(False, "2a", "trsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], False, "Solves a triangular system of equations"),
+ Routine(False, "2a", "tbsv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], False, "Solves a banded triangular system of equations"),
+ Routine(False, "2a", "tpsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], False, "Solves a packed triangular system of equations"),
+ # Level 2: matrix update
+ Routine(False, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"),
+ Routine(False, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"),
+ Routine(False, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"),
+ Routine(False, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"),
+ Routine(False, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"),
+ Routine(False, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"),
+ Routine(False, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"),
+ Routine(False, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"),
+ Routine(False, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"),
+ Routine(False, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"),
+ Routine(False, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"),
],
-[ # Level 3
- Routine(True, 3, "gemm", T, [S,D,C,Z], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Generalized matrix-matrix multiplication"),
- Routine(True, 3, "symm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Symmetric matrix-matrix multiplication"),
- Routine(True, 3, "hemm", T, [C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Hermitian matrix-matrix multiplication"),
- Routine(True, 3, "syrk", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a symmetric matrix"),
- Routine(True, 3, "herk", Tc, [Css,Zdd], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a hermitian matrix"),
- Routine(True, 3, "syr2k", T, [S,D,C,Z], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a symmetric matrix"),
- Routine(True, 3, "her2k", TU, [Ccs,Zzd], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a hermitian matrix"),
- Routine(True, 3, "trmm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Triangular matrix-matrix multiplication"),
+[ # Level 3: matrix-matrix
+ Routine(True, "3", "gemm", T, [S,D,C,Z], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "General matrix-matrix multiplication"),
+ Routine(True, "3", "symm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Symmetric matrix-matrix multiplication"),
+ Routine(True, "3", "hemm", T, [C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Hermitian matrix-matrix multiplication"),
+ Routine(True, "3", "syrk", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a symmetric matrix"),
+ Routine(True, "3", "herk", Tc, [Css,Zdd], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a hermitian matrix"),
+ Routine(True, "3", "syr2k", T, [S,D,C,Z], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a symmetric matrix"),
+ Routine(True, "3", "her2k", TU, [Ccs,Zzd], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a hermitian matrix"),
+ Routine(True, "3", "trmm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Triangular matrix-matrix multiplication"),
+ Routine(False, "3", "trsm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Solves a triangular system of equations"),
]]
# ==================================================================================================
diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py
index b2c50e3d..df4dd019 100644
--- a/scripts/generator/routine.py
+++ b/scripts/generator/routine.py
@@ -12,6 +12,9 @@
#
# ==================================================================================================
+# System modules
+from itertools import chain
+
# Translates an option name to a CLBlast data-type
def OptionToCLBlast(x):
return {
@@ -36,6 +39,9 @@ def OptionToWrapper(x):
'diagonal': "clblasDiag",
}[x]
+# Buffers without 'ld' or 'inc' parameter
+NO_LD_INC = ["dot","ap"]
+
# ==================================================================================================
# Class holding routine-specific information (e.g. name, which arguments, which precisions)
@@ -71,6 +77,16 @@ class Routine():
def ShortNames(self):
return "/".join([f.name+self.name.upper() for f in self.flavours])
+ # Determines which buffers go first (between alpha and beta) and which ones go after
+ def BuffersFirst(self):
+ if self.level == "2b":
+ return ["x","y"]
+ return ["ap","a","b","x"]
+ def BuffersSecond(self):
+ if self.level == "2b":
+ return ["ap","a","b","c"]
+ return ["y","c"]
+
# ==============================================================================================
# Retrieves a variable name for a specific input/output vector/matrix (e.g. 'x')
@@ -78,7 +94,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [name+"_buffer"]
b = [name+"_offset"]
- c = [name+"_"+self.Postfix(name)] if (name not in ["dot"]) else []
+ c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
return [", ".join(a+b+c)]
return []
@@ -88,7 +104,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [prefix+"cl_mem "+name+"_buffer"]
b = ["const size_t "+name+"_offset"]
- c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in ["dot"]) else []
+ c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
return [", ".join(a+b+c)]
return []
@@ -97,7 +113,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = ["Buffer<"+self.template.buffertype+">("+name+"_buffer)"]
b = [name+"_offset"]
- c = [name+"_"+self.Postfix(name)] if (name not in ["dot"]) else []
+ c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
return [", ".join(a+b+c)]
return []
@@ -120,7 +136,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [prefix+"cl_mem"]
b = ["const size_t"]
- c = ["const size_t"] if (name not in ["dot"]) else []
+ c = ["const size_t"] if (name not in NO_LD_INC) else []
return [", ".join(a+b+c)]
return []
@@ -134,41 +150,45 @@ class Routine():
# Retrieves the use of a scalar (alpha/beta)
def ScalarUse(self, name, flavour):
- if ((name == "alpha") and (name in self.scalars)):
- return [flavour.UseAlpha()]
- elif ((name == "beta") and (name in self.scalars)):
- return [flavour.UseBeta()]
+ if name in self.scalars:
+ if name == "alpha":
+ return [flavour.UseAlpha()]
+ elif name == "beta":
+ return [flavour.UseBeta()]
+ return [name]
return []
# Retrieves the use of a scalar (alpha/beta)
def ScalarUseWrapper(self, name, flavour):
- if ((name == "alpha") and (name in self.scalars)):
- return [flavour.UseAlphaCL()]
- elif ((name == "beta") and (name in self.scalars)):
- return [flavour.UseBetaCL()]
+ if name in self.scalars:
+ if name == "alpha":
+ return [flavour.UseAlphaCL()]
+ elif name == "beta":
+ return [flavour.UseBetaCL()]
+ return [name]
return []
# Retrieves the definition of a scalar (alpha/beta)
def ScalarDef(self, name, flavour):
- if ((name == "alpha") and (name in self.scalars)):
- return ["const "+flavour.alpha_cl+" "+name]
- elif ((name == "beta") and (name in self.scalars)):
+ if name in self.scalars:
+ if name == "alpha":
+ return ["const "+flavour.alpha_cl+" "+name]
return ["const "+flavour.beta_cl+" "+name]
return []
# As above, but without 'cl_' prefix
def ScalarDefPlain(self, name, flavour):
- if ((name == "alpha") and (name in self.scalars)):
- return ["const "+flavour.alpha_cpp+" "+name]
- elif ((name == "beta") and (name in self.scalars)):
+ if name in self.scalars:
+ if name == "alpha":
+ return ["const "+flavour.alpha_cpp+" "+name]
return ["const "+flavour.beta_cpp+" "+name]
return []
# Retrieves the type of a scalar (alpha/beta)
def ScalarType(self, name, flavour):
- if ((name == "alpha") and (name in self.scalars)):
- return ["const "+flavour.alpha_cpp]
- elif ((name == "beta") and (name in self.scalars)):
+ if name in self.scalars:
+ if name == "alpha":
+ return ["const "+flavour.alpha_cpp]
return ["const "+flavour.beta_cpp]
return []
@@ -234,43 +254,55 @@ class Routine():
def ArgumentsCladuc(self, flavour, indent):
return (self.Options() + self.Sizes() + self.BufferCladuc("dot") +
self.Scalar("alpha") +
- self.BufferCladuc("a") + self.BufferCladuc("b") + self.BufferCladuc("x") +
- self.Scalar("beta") + self.BufferCladuc("y") + self.BufferCladuc("c"))
+ list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) +
+ self.Scalar("beta") +
+ list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.Scalar(s) for s in ["d1","d2","a","b","c","s"]])))
# Retrieves a combination of all the argument names, with CLBlast casts
def ArgumentsCast(self, flavour, indent):
return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") +
self.ScalarUse("alpha", flavour) +
- self.Buffer("a") + self.Buffer("b") + self.Buffer("x") +
- self.ScalarUse("beta", flavour) + self.Buffer("y") + self.Buffer("c"))
+ list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) +
+ self.ScalarUse("beta", flavour) +
+ list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.ScalarUse(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
# As above, but for the clBLAS wrapper
def ArgumentsWrapper(self, flavour):
return (self.Options() + self.Sizes() + self.BufferWrapper("dot") +
self.ScalarUseWrapper("alpha", flavour) +
- self.BufferWrapper("a") + self.BufferWrapper("b") + self.BufferWrapper("x") +
- self.ScalarUseWrapper("beta", flavour) + self.BufferWrapper("y") + self.BufferWrapper("c"))
+ list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) +
+ self.ScalarUseWrapper("beta", flavour) +
+ list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
# Retrieves a combination of all the argument definitions
def ArgumentsDef(self, flavour):
return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") +
self.ScalarDef("alpha", flavour) +
- self.BufferDef("a") + self.BufferDef("b") + self.BufferDef("x") +
- self.ScalarDef("beta", flavour) + self.BufferDef("y") + self.BufferDef("c"))
+ list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
+ self.ScalarDef("beta", flavour) +
+ list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.ScalarDef(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
# As above, but clBLAS wrapper plain datatypes
def ArgumentsDefWrapper(self, flavour):
return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") +
self.ScalarDefPlain("alpha", flavour) +
- self.BufferDef("a") + self.BufferDef("b") + self.BufferDef("x") +
- self.ScalarDefPlain("beta", flavour) + self.BufferDef("y") + self.BufferDef("c"))
+ list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
+ self.ScalarDefPlain("beta", flavour) +
+ list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
# Retrieves a combination of all the argument types
def ArgumentsType(self, flavour):
return (self.OptionsType() + self.SizesType() + self.BufferType("dot") +
self.ScalarType("alpha", flavour) +
- self.BufferType("a") + self.BufferType("b") + self.BufferType("x") +
- self.ScalarType("beta", flavour) + self.BufferType("y") + self.BufferType("c"))
+ list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) +
+ self.ScalarType("beta", flavour) +
+ list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.ScalarType(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
# ==============================================================================================
@@ -290,7 +322,7 @@ class Routine():
result = "template <"+self.template.name+">\n"
result += "StatusCode "+self.name.capitalize()+"("
result += (",\n"+indent).join([a for a in self.ArgumentsType(self.template)])
- result += ",\n"+indent+"cl_command_queue* queue, cl_event* event)"
+ result += ",\n"+indent+"cl_command_queue*, cl_event*)"
return result
# As above, but now for C