From 5c83217cf256984573924e8f89c46f393a5fcfcd Mon Sep 17 00:00:00 2001 From: cnugteren Date: Fri, 1 Apr 2016 22:36:39 -0700 Subject: Added a wrapper for CBLAS libraries for performance/correctness testing --- scripts/generator/datatype.py | 5 ++ scripts/generator/generator.py | 53 +++++++++++++++++--- scripts/generator/routine.py | 109 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 147 insertions(+), 20 deletions(-) (limited to 'scripts') diff --git a/scripts/generator/datatype.py b/scripts/generator/datatype.py index 9323bc4d..5a58ab53 100644 --- a/scripts/generator/datatype.py +++ b/scripts/generator/datatype.py @@ -58,5 +58,10 @@ class DataType(): return "<"+self.buffertype+","+self.beta_cpp+">, "+self.buffertype+", "+self.beta_cpp return "<"+self.buffertype+">, "+self.buffertype+", "+self.beta_cpp + # Current scalar is complex + def IsComplex(self, scalar): + return ((scalar == "alpha" and self.alpha_cpp in [FLT2, DBL2]) or + (scalar == "beta" and self.beta_cpp in [FLT2, DBL2])) + # ================================================================================================== diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 6e2b2ed2..36a9bf40 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -8,12 +8,13 @@ # Cedric Nugteren # # This script automatically generates the bodies of the following files, creating the full CLBlast -# API interface and implementation (C, C++, and clBLAS wrapper): +# API interface and implementation (C, C++, and reference BLAS wrappers): # clblast.h # clblast.cc # clblast_c.h # clblast_c.cc # wrapper_clblas.h +# wrapper_cblas.h # It also generates the main functions for the correctness and performance tests as found in # test/correctness/routines/levelX/xYYYY.cc # test/performance/routines/levelX/xYYYY.cc @@ -55,7 +56,7 @@ TU = DataType("TU", "typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") routines = [ [ # Level 1: vector-vector Routine(False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], [], "", "Generate givens plane rotation"), - Routine(False, "1", "rotmg", T, [S,D], [], [], [], ["sd1","sd2","sx1","sy1","sparam"], [], "", "Generate modified givens plane rotation"), + Routine(False, "1", "rotmg", T, [S,D], [], [], ["sy1"], ["sd1","sd2","sx1","sparam"], [], "", "Generate modified givens plane rotation"), Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["cos","sin"], "", "Apply givens plane rotation"), Routine(False, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [], "", "Apply modified givens plane rotation"), Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], "", "Swap two vectors"), @@ -220,11 +221,11 @@ def wrapper_clblas(routines): for routine in routines: result += "\n// Forwards the clBLAS calls for %s\n" % (routine.ShortNames()) if routine.NoScalars(): - result += routine.RoutineHeaderWrapper(routine.template, True, 21)+";\n" + result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n" for flavour in routine.flavours: indent = " "*(17 + routine.Length()) - result += routine.RoutineHeaderWrapper(flavour, False, 21)+" {\n" - arguments = routine.ArgumentsWrapper(flavour) + result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n" + arguments = routine.ArgumentsWrapperCL(flavour) if routine.scratch: result += " auto queue = Queue(queues[0]);\n" result += " auto context = queue.GetContext();\n" @@ -236,6 +237,41 @@ def wrapper_clblas(routines): result += "\n}\n" return result +# The wrapper to the reference CBLAS routines (for performance/correctness testing) +def wrapper_cblas(routines): + result = "" + for routine in routines: + result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNames()) + for flavour in routine.flavours: + indent = " "*(10 + routine.Length()) + result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n" + arguments = routine.ArgumentsWrapperC(flavour) + + # Double-precision scalars + for scalar in routine.scalars: + if flavour.IsComplex(scalar): + result += " const auto "+scalar+"_array = std::vector<"+flavour.buffertype[:-1]+">{"+scalar+".real(), "+scalar+".imag()};\n" + + # Special case for scalar outputs + assignment = "" + postfix = "" + extra_argument = "" + for output_buffer in routine.outputs: + if output_buffer in routine.ScalarBuffersFirst(): + if flavour in [C,Z]: + postfix += "_sub" + indent += " " + extra_argument += ",\n"+indent+"reinterpret_cast(&"+output_buffer+"_buffer["+output_buffer+"_offset])" + else: + assignment = output_buffer+"_buffer["+output_buffer+"_offset] = " + indent += " "*len(assignment) + + result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"(" + result += (",\n"+indent).join([a for a in arguments]) + result += extra_argument+");" + result += "\n}\n" + return result + # ================================================================================================== # Checks for the number of command-line arguments @@ -251,9 +287,10 @@ files = [ path_clblast+"/include/clblast_c.h", path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", + path_clblast+"/test/wrapper_cblas.h", ] -header_lines = [84, 65, 93, 22, 22] -footer_lines = [6, 3, 9, 2, 6] +header_lines = [84, 65, 93, 22, 22, 31] +footer_lines = [6, 3, 9, 2, 6, 6] # Checks whether the command-line arguments are valid; exists otherwise for f in files: @@ -287,6 +324,8 @@ for i in xrange(0,len(files)): body += clblast_c_cc(routines[level-1]) if i == 4: body += wrapper_clblas(routines[level-1]) + if i == 5: + body += wrapper_cblas(routines[level-1]) f.write("".join(file_header)) f.write(body) f.write("".join(file_footer)) diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index 02040583..fffa19f6 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -28,7 +28,7 @@ def OptionToCLBlast(x): }[x] # As above, but for clBLAS data-types -def OptionToWrapper(x): +def OptionToWrapperCL(x): return { 'layout': "clblasOrder", 'a_transpose': "clblasTranspose", @@ -39,6 +39,18 @@ def OptionToWrapper(x): 'diagonal': "clblasDiag", }[x] +# As above, but for CBLAS data-types +def OptionToWrapperC(x): + return { + 'layout': "CBLAS_ORDER", + 'a_transpose': "CBLAS_TRANSPOSE", + 'b_transpose': "CBLAS_TRANSPOSE", + 'ab_transpose': "CBLAS_TRANSPOSE", + 'side': "CBLAS_SIDE", + 'triangle': "CBLAS_UPLO", + 'diagonal': "CBLAS_DIAG", + }[x] + # ================================================================================================== # Class holding routine-specific information (e.g. name, which arguments, which precisions) @@ -119,6 +131,16 @@ class Routine(): return [", ".join(a+b+c)] return [] + # As above but as vectors + def BufferDefVector(self, name, flavour): + prefix = "const " if (name in self.inputs) else "" + if (name in self.inputs) or (name in self.outputs): + a = [prefix+"std::vector<"+flavour.buffertype+">& "+name+"_buffer"] + b = ["const size_t "+name+"_offset"] + c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else [] + return [", ".join(a+b+c)] + return [] + # As above but with Claduc buffers def BufferCladuc(self, name): if (name in self.inputs) or (name in self.outputs): @@ -129,7 +151,7 @@ class Routine(): return [] # As above but with a static cast for clBLAS wrapper - def BufferWrapper(self, name): + def BufferWrapperCL(self, name): if (name in self.inputs) or (name in self.outputs): a = [name+"_buffer"] b = [name+"_offset"] @@ -141,6 +163,24 @@ class Routine(): return [", ".join(a+b+c)] return [] + # As above but with a static cast for CBLAS wrapper + def BufferWrapperC(self, name, flavour): + prefix = "const " if (name in self.inputs) else "" + if (name in self.inputs) or (name in self.outputs): + if name == "sy1": + a = [name+"_buffer["+name+"_offset]"] + elif flavour.precision_name in ["C","Z"]: + a = ["reinterpret_cast<"+prefix+flavour.buffertype[:-1]+"*>(&"+name+"_buffer["+name+"_offset])"] + else: + a = ["&"+name+"_buffer["+name+"_offset]"] + c = [] + if (name in ["x","y"]): + c = ["static_cast("+name+"_"+self.Postfix(name)+")"] + elif (name in ["a","b","c"]): + c = [name+"_"+self.Postfix(name)] + return [", ".join(a+c)] + return [] + # As above, but only data-types def BufferType(self, name): prefix = "const " if (name in self.inputs) else "" @@ -179,6 +219,14 @@ class Routine(): return [name] return [] + # Retrieves the use of a scalar for CBLAS (alpha/beta) + def ScalarUseWrapperC(self, name, flavour): + if name in self.scalars: + if flavour.IsComplex(name): + return [name+"_array.data()"] + return [name] + return [] + # Retrieves the definition of a scalar (alpha/beta) def ScalarDef(self, name, flavour): if name in self.scalars: @@ -246,9 +294,16 @@ class Routine(): return [] # As above, but now using clBLAS data-types - def OptionsDefWrapper(self): + def OptionsDefWrapperCL(self): + if self.options: + definitions = ["const "+OptionToWrapperCL(o)+" "+o for o in self.options] + return [", ".join(definitions)] + return [] + + # As above, but now using CBLAS data-types + def OptionsDefWrapperC(self): if self.options: - definitions = ["const "+OptionToWrapper(o)+" "+o for o in self.options] + definitions = ["const "+OptionToWrapperC(o)+" "+o for o in self.options] return [", ".join(definitions)] return [] @@ -284,16 +339,26 @@ class Routine(): list(chain(*[self.ScalarUse(s, flavour) for s in self.OtherScalars()]))) # As above, but for the clBLAS wrapper - def ArgumentsWrapper(self, flavour): + def ArgumentsWrapperCL(self, flavour): return (self.Options() + self.Sizes() + - list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersFirst()])) + + list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersFirst()])) + self.ScalarUseWrapper("alpha", flavour) + - list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) + + list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersFirst()])) + self.ScalarUseWrapper("beta", flavour) + - list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) + - list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersSecond()])) + + list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersSecond()])) + list(chain(*[self.ScalarUseWrapper(s, flavour) for s in self.OtherScalars()]))) + # As above, but for the CBLAS wrapper + def ArgumentsWrapperC(self, flavour): + return (self.Options() + self.Sizes() + + self.ScalarUseWrapperC("alpha", flavour) + + list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersFirst()])) + + self.ScalarUseWrapperC("beta", flavour) + + list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersSecond()])) + + list(chain(*[self.BufferWrapperC(b, flavour) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarUseWrapperC(s, flavour) for s in self.OtherScalars()]))) + # Retrieves a combination of all the argument definitions def ArgumentsDef(self, flavour): return (self.OptionsDef() + self.SizesDef() + @@ -306,8 +371,8 @@ class Routine(): list(chain(*[self.ScalarDef(s, flavour) for s in self.OtherScalars()]))) # As above, but clBLAS wrapper plain datatypes - def ArgumentsDefWrapper(self, flavour): - return (self.OptionsDefWrapper() + self.SizesDef() + + def ArgumentsDefWrapperCL(self, flavour): + return (self.OptionsDefWrapperCL() + self.SizesDef() + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) + self.ScalarDefPlain("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + @@ -315,6 +380,17 @@ class Routine(): list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) + list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()]))) + + # As above, but CBLAS wrapper plain datatypes + def ArgumentsDefWrapperC(self, flavour): + return (self.OptionsDefWrapperC() + self.SizesDef() + + list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersFirst()])) + + self.ScalarDefPlain("alpha", flavour) + + list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersFirst()])) + + self.ScalarDefPlain("beta", flavour) + + list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersSecond()])) + + list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()]))) # Retrieves a combination of all the argument types def ArgumentsType(self, flavour): @@ -356,7 +432,7 @@ class Routine(): return result # As above, but now for the clBLAS wrapper - def RoutineHeaderWrapper(self, flavour, def_only, spaces): + def RoutineHeaderWrapperCL(self, flavour, def_only, spaces): template = "<"+flavour.template+">" if self.NoScalars() and not def_only else "" indent = " "*(spaces + self.Length() + len(template)) result = "" @@ -366,9 +442,16 @@ class Routine(): result += flavour.name result += ">\n" result += "clblasStatus clblasX"+self.name+template+"(" - result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapper(flavour)]) + result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperCL(flavour)]) result += ",\n"+indent+"cl_uint num_queues, cl_command_queue *queues" result += ",\n"+indent+"cl_uint num_wait_events, const cl_event *wait_events, cl_event *events)" return result + # As above, but now for the CBLAS wrapper + def RoutineHeaderWrapperC(self, flavour, def_only, spaces): + indent = " "*(spaces + self.Length()) + result = "void cblasX"+self.name+"(" + result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperC(flavour)])+")" + return result + # ================================================================================================== -- cgit v1.2.3