diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/generator/generator.py | 68 | ||||
-rw-r--r-- | scripts/generator/routine.py | 100 |
2 files changed, 113 insertions, 55 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 699cd9cf..9c9675b8 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -48,29 +48,55 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for # Populates a list of routines routines = [ -[ # Level 1 - Routine(True, 1, "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"), - Routine(True, 1, "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"), - Routine(True, 1, "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"), - Routine(True, 1, "axpy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], ["alpha"], False, "Vector-times-constant plus vector"), - Routine(True, 1, "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"), - Routine(True, 1, "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"), - Routine(True, 1, "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"), +[ # Level 1: vector-vector + #Routine(False, "1", "rotg", T, [S,D], [], [], [], [], ["a","b","c","s"], False, "Generate plane rotation"), + #Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["c","s"], False, "Apply plane rotation"), + Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"), + Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"), + Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"), + Routine(True, "1", "axpy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], ["alpha"], False, "Vector-times-constant plus vector"), + Routine(True, "1", "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"), + Routine(True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"), + Routine(True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"), ], -[ # Level 2 - Routine(True, 2, "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "Generalized matrix-vector multiplication"), - Routine(True, 2, "hemv", T, [C,Z], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian matrix-vector multiplication"), - Routine(True, 2, "symv", T, [S,D], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric matrix-vector multiplication"), +[ # Level 2: matrix-vector + Routine(True, "2a", "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General matrix-vector multiplication"), + Routine(False, "2a", "gbmv", T, [S,D,C,Z], ["m","n","kl","ku"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General banded matrix-vector multiplication"), + Routine(True, "2a", "hemv", T, [C,Z], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian matrix-vector multiplication"), + Routine(False, "2a", "hbmv", T, [C,Z], ["n","k"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Hermitian banded matrix-vector multiplication"), + Routine(False, "2a", "hpmv", T, [C,Z], ["n"], ["layout","triangle"], ["ap","x"], ["y"], ["alpha","beta"], False, "Hermitian packed matrix-vector multiplication"), + Routine(True, "2a", "symv", T, [S,D], ["n"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric matrix-vector multiplication"), + Routine(False, "2a", "sbmv", T, [S,D], ["n","k"], ["layout","triangle"], ["a","x"], ["y"], ["alpha","beta"], False, "Symmetric banded matrix-vector multiplication"), + Routine(False, "2a", "spmv", T, [S,D], ["n"], ["layout","triangle"], ["ap","x"], ["y"], ["alpha","beta"], False, "Symmetric packed matrix-vector multiplication"), + Routine(False, "2a", "trmv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], True, "Triangular matrix-vector multiplication"), + Routine(False, "2a", "tbmv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], True, "Triangular banded matrix-vector multiplication"), + Routine(False, "2a", "tpmv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], True, "Triangular packed matrix-vector multiplication"), + Routine(False, "2a", "trsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], False, "Solves a triangular system of equations"), + Routine(False, "2a", "tbsv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], False, "Solves a banded triangular system of equations"), + Routine(False, "2a", "tpsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], False, "Solves a packed triangular system of equations"), + # Level 2: matrix update + Routine(False, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"), + Routine(False, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"), + Routine(False, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), + Routine(False, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), + Routine(False, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), + Routine(False, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), + Routine(False, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"), + Routine(False, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"), + Routine(False, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"), + Routine(False, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"), + Routine(False, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"), ], -[ # Level 3 - Routine(True, 3, "gemm", T, [S,D,C,Z], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Generalized matrix-matrix multiplication"), - Routine(True, 3, "symm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Symmetric matrix-matrix multiplication"), - Routine(True, 3, "hemm", T, [C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Hermitian matrix-matrix multiplication"), - Routine(True, 3, "syrk", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a symmetric matrix"), - Routine(True, 3, "herk", Tc, [Css,Zdd], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a hermitian matrix"), - Routine(True, 3, "syr2k", T, [S,D,C,Z], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a symmetric matrix"), - Routine(True, 3, "her2k", TU, [Ccs,Zzd], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a hermitian matrix"), - Routine(True, 3, "trmm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Triangular matrix-matrix multiplication"), +[ # Level 3: matrix-matrix + Routine(True, "3", "gemm", T, [S,D,C,Z], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "General matrix-matrix multiplication"), + Routine(True, "3", "symm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Symmetric matrix-matrix multiplication"), + Routine(True, "3", "hemm", T, [C,Z], ["m","n"], ["layout","side","triangle"], ["a","b"], ["c"], ["alpha","beta"], False, "Hermitian matrix-matrix multiplication"), + Routine(True, "3", "syrk", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a symmetric matrix"), + Routine(True, "3", "herk", Tc, [Css,Zdd], ["n","k"], ["layout","triangle","a_transpose"], ["a"], ["c"], ["alpha","beta"], False, "Rank-K update of a hermitian matrix"), + Routine(True, "3", "syr2k", T, [S,D,C,Z], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a symmetric matrix"), + Routine(True, "3", "her2k", TU, [Ccs,Zzd], ["n","k"], ["layout","triangle","ab_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "Rank-2K update of a hermitian matrix"), + Routine(True, "3", "trmm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Triangular matrix-matrix multiplication"), + Routine(False, "3", "trsm", T, [S,D,C,Z], ["m","n"], ["layout","side","triangle","a_transpose","diagonal"], ["a"], ["b"], ["alpha"], False, "Solves a triangular system of equations"), ]] # ================================================================================================== diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index b2c50e3d..df4dd019 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -12,6 +12,9 @@ # # ================================================================================================== +# System modules +from itertools import chain + # Translates an option name to a CLBlast data-type def OptionToCLBlast(x): return { @@ -36,6 +39,9 @@ def OptionToWrapper(x): 'diagonal': "clblasDiag", }[x] +# Buffers without 'ld' or 'inc' parameter +NO_LD_INC = ["dot","ap"] + # ================================================================================================== # Class holding routine-specific information (e.g. name, which arguments, which precisions) @@ -71,6 +77,16 @@ class Routine(): def ShortNames(self): return "/".join([f.name+self.name.upper() for f in self.flavours]) + # Determines which buffers go first (between alpha and beta) and which ones go after + def BuffersFirst(self): + if self.level == "2b": + return ["x","y"] + return ["ap","a","b","x"] + def BuffersSecond(self): + if self.level == "2b": + return ["ap","a","b","c"] + return ["y","c"] + # ============================================================================================== # Retrieves a variable name for a specific input/output vector/matrix (e.g. 'x') @@ -78,7 +94,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [name+"_buffer"] b = [name+"_offset"] - c = [name+"_"+self.Postfix(name)] if (name not in ["dot"]) else [] + c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] return [", ".join(a+b+c)] return [] @@ -88,7 +104,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [prefix+"cl_mem "+name+"_buffer"] b = ["const size_t "+name+"_offset"] - c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in ["dot"]) else [] + c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] return [", ".join(a+b+c)] return [] @@ -97,7 +113,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = ["Buffer<"+self.template.buffertype+">("+name+"_buffer)"] b = [name+"_offset"] - c = [name+"_"+self.Postfix(name)] if (name not in ["dot"]) else [] + c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] return [", ".join(a+b+c)] return [] @@ -120,7 +136,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [prefix+"cl_mem"] b = ["const size_t"] - c = ["const size_t"] if (name not in ["dot"]) else [] + c = ["const size_t"] if (name not in NO_LD_INC) else [] return [", ".join(a+b+c)] return [] @@ -134,41 +150,45 @@ class Routine(): # Retrieves the use of a scalar (alpha/beta) def ScalarUse(self, name, flavour): - if ((name == "alpha") and (name in self.scalars)): - return [flavour.UseAlpha()] - elif ((name == "beta") and (name in self.scalars)): - return [flavour.UseBeta()] + if name in self.scalars: + if name == "alpha": + return [flavour.UseAlpha()] + elif name == "beta": + return [flavour.UseBeta()] + return [name] return [] # Retrieves the use of a scalar (alpha/beta) def ScalarUseWrapper(self, name, flavour): - if ((name == "alpha") and (name in self.scalars)): - return [flavour.UseAlphaCL()] - elif ((name == "beta") and (name in self.scalars)): - return [flavour.UseBetaCL()] + if name in self.scalars: + if name == "alpha": + return [flavour.UseAlphaCL()] + elif name == "beta": + return [flavour.UseBetaCL()] + return [name] return [] # Retrieves the definition of a scalar (alpha/beta) def ScalarDef(self, name, flavour): - if ((name == "alpha") and (name in self.scalars)): - return ["const "+flavour.alpha_cl+" "+name] - elif ((name == "beta") and (name in self.scalars)): + if name in self.scalars: + if name == "alpha": + return ["const "+flavour.alpha_cl+" "+name] return ["const "+flavour.beta_cl+" "+name] return [] # As above, but without 'cl_' prefix def ScalarDefPlain(self, name, flavour): - if ((name == "alpha") and (name in self.scalars)): - return ["const "+flavour.alpha_cpp+" "+name] - elif ((name == "beta") and (name in self.scalars)): + if name in self.scalars: + if name == "alpha": + return ["const "+flavour.alpha_cpp+" "+name] return ["const "+flavour.beta_cpp+" "+name] return [] # Retrieves the type of a scalar (alpha/beta) def ScalarType(self, name, flavour): - if ((name == "alpha") and (name in self.scalars)): - return ["const "+flavour.alpha_cpp] - elif ((name == "beta") and (name in self.scalars)): + if name in self.scalars: + if name == "alpha": + return ["const "+flavour.alpha_cpp] return ["const "+flavour.beta_cpp] return [] @@ -234,43 +254,55 @@ class Routine(): def ArgumentsCladuc(self, flavour, indent): return (self.Options() + self.Sizes() + self.BufferCladuc("dot") + self.Scalar("alpha") + - self.BufferCladuc("a") + self.BufferCladuc("b") + self.BufferCladuc("x") + - self.Scalar("beta") + self.BufferCladuc("y") + self.BufferCladuc("c")) + list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) + + self.Scalar("beta") + + list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) + + list(chain(*[self.Scalar(s) for s in ["d1","d2","a","b","c","s"]]))) # Retrieves a combination of all the argument names, with CLBlast casts def ArgumentsCast(self, flavour, indent): return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") + self.ScalarUse("alpha", flavour) + - self.Buffer("a") + self.Buffer("b") + self.Buffer("x") + - self.ScalarUse("beta", flavour) + self.Buffer("y") + self.Buffer("c")) + list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) + + self.ScalarUse("beta", flavour) + + list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) + + list(chain(*[self.ScalarUse(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) # As above, but for the clBLAS wrapper def ArgumentsWrapper(self, flavour): return (self.Options() + self.Sizes() + self.BufferWrapper("dot") + self.ScalarUseWrapper("alpha", flavour) + - self.BufferWrapper("a") + self.BufferWrapper("b") + self.BufferWrapper("x") + - self.ScalarUseWrapper("beta", flavour) + self.BufferWrapper("y") + self.BufferWrapper("c")) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) + + self.ScalarUseWrapper("beta", flavour) + + list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) + + list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) # Retrieves a combination of all the argument definitions def ArgumentsDef(self, flavour): return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") + self.ScalarDef("alpha", flavour) + - self.BufferDef("a") + self.BufferDef("b") + self.BufferDef("x") + - self.ScalarDef("beta", flavour) + self.BufferDef("y") + self.BufferDef("c")) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + + self.ScalarDef("beta", flavour) + + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + + list(chain(*[self.ScalarDef(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) # As above, but clBLAS wrapper plain datatypes def ArgumentsDefWrapper(self, flavour): return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") + self.ScalarDefPlain("alpha", flavour) + - self.BufferDef("a") + self.BufferDef("b") + self.BufferDef("x") + - self.ScalarDefPlain("beta", flavour) + self.BufferDef("y") + self.BufferDef("c")) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + + self.ScalarDefPlain("beta", flavour) + + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + + list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) # Retrieves a combination of all the argument types def ArgumentsType(self, flavour): return (self.OptionsType() + self.SizesType() + self.BufferType("dot") + self.ScalarType("alpha", flavour) + - self.BufferType("a") + self.BufferType("b") + self.BufferType("x") + - self.ScalarType("beta", flavour) + self.BufferType("y") + self.BufferType("c")) + list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) + + self.ScalarType("beta", flavour) + + list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) + + list(chain(*[self.ScalarType(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) # ============================================================================================== @@ -290,7 +322,7 @@ class Routine(): result = "template <"+self.template.name+">\n" result += "StatusCode "+self.name.capitalize()+"(" result += (",\n"+indent).join([a for a in self.ArgumentsType(self.template)]) - result += ",\n"+indent+"cl_command_queue* queue, cl_event* event)" + result += ",\n"+indent+"cl_command_queue*, cl_event*)" return result # As above, but now for C |