summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-04-01 22:36:39 -0700
committercnugteren <web@cedricnugteren.nl>2016-04-01 22:36:39 -0700
commit5c83217cf256984573924e8f89c46f393a5fcfcd (patch)
treeb260ec46e10e12ff63d465212652523c3cfa7bc3 /scripts
parenta2056f2216526989f423a74e4bcd016dac9424f4 (diff)
Added a wrapper for CBLAS libraries for performance/correctness testing
Diffstat (limited to 'scripts')
-rw-r--r--scripts/generator/datatype.py5
-rw-r--r--scripts/generator/generator.py53
-rw-r--r--scripts/generator/routine.py109
3 files changed, 147 insertions, 20 deletions
diff --git a/scripts/generator/datatype.py b/scripts/generator/datatype.py
index 9323bc4d..5a58ab53 100644
--- a/scripts/generator/datatype.py
+++ b/scripts/generator/datatype.py
@@ -58,5 +58,10 @@ class DataType():
return "<"+self.buffertype+","+self.beta_cpp+">, "+self.buffertype+", "+self.beta_cpp
return "<"+self.buffertype+">, "+self.buffertype+", "+self.beta_cpp
+ # Current scalar is complex
+ def IsComplex(self, scalar):
+ return ((scalar == "alpha" and self.alpha_cpp in [FLT2, DBL2]) or
+ (scalar == "beta" and self.beta_cpp in [FLT2, DBL2]))
+
# ==================================================================================================
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 6e2b2ed2..36a9bf40 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -8,12 +8,13 @@
# Cedric Nugteren <www.cedricnugteren.nl>
#
# This script automatically generates the bodies of the following files, creating the full CLBlast
-# API interface and implementation (C, C++, and clBLAS wrapper):
+# API interface and implementation (C, C++, and reference BLAS wrappers):
# clblast.h
# clblast.cc
# clblast_c.h
# clblast_c.cc
# wrapper_clblas.h
+# wrapper_cblas.h
# It also generates the main functions for the correctness and performance tests as found in
# test/correctness/routines/levelX/xYYYY.cc
# test/performance/routines/levelX/xYYYY.cc
@@ -55,7 +56,7 @@ TU = DataType("TU", "typename T, typename U", "T,U", ["T", "U", "T", "U"], "T")
routines = [
[ # Level 1: vector-vector
Routine(False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], [], "", "Generate givens plane rotation"),
- Routine(False, "1", "rotmg", T, [S,D], [], [], [], ["sd1","sd2","sx1","sy1","sparam"], [], "", "Generate modified givens plane rotation"),
+ Routine(False, "1", "rotmg", T, [S,D], [], [], ["sy1"], ["sd1","sd2","sx1","sparam"], [], "", "Generate modified givens plane rotation"),
Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["cos","sin"], "", "Apply givens plane rotation"),
Routine(False, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [], "", "Apply modified givens plane rotation"),
Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], "", "Swap two vectors"),
@@ -220,11 +221,11 @@ def wrapper_clblas(routines):
for routine in routines:
result += "\n// Forwards the clBLAS calls for %s\n" % (routine.ShortNames())
if routine.NoScalars():
- result += routine.RoutineHeaderWrapper(routine.template, True, 21)+";\n"
+ result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n"
for flavour in routine.flavours:
indent = " "*(17 + routine.Length())
- result += routine.RoutineHeaderWrapper(flavour, False, 21)+" {\n"
- arguments = routine.ArgumentsWrapper(flavour)
+ result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
+ arguments = routine.ArgumentsWrapperCL(flavour)
if routine.scratch:
result += " auto queue = Queue(queues[0]);\n"
result += " auto context = queue.GetContext();\n"
@@ -236,6 +237,41 @@ def wrapper_clblas(routines):
result += "\n}\n"
return result
+# The wrapper to the reference CBLAS routines (for performance/correctness testing)
+def wrapper_cblas(routines):
+ result = ""
+ for routine in routines:
+ result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNames())
+ for flavour in routine.flavours:
+ indent = " "*(10 + routine.Length())
+ result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
+ arguments = routine.ArgumentsWrapperC(flavour)
+
+ # Double-precision scalars
+ for scalar in routine.scalars:
+ if flavour.IsComplex(scalar):
+ result += " const auto "+scalar+"_array = std::vector<"+flavour.buffertype[:-1]+">{"+scalar+".real(), "+scalar+".imag()};\n"
+
+ # Special case for scalar outputs
+ assignment = ""
+ postfix = ""
+ extra_argument = ""
+ for output_buffer in routine.outputs:
+ if output_buffer in routine.ScalarBuffersFirst():
+ if flavour in [C,Z]:
+ postfix += "_sub"
+ indent += " "
+ extra_argument += ",\n"+indent+"reinterpret_cast<return_pointer_"+flavour.buffertype[:-1]+">(&"+output_buffer+"_buffer["+output_buffer+"_offset])"
+ else:
+ assignment = output_buffer+"_buffer["+output_buffer+"_offset] = "
+ indent += " "*len(assignment)
+
+ result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"("
+ result += (",\n"+indent).join([a for a in arguments])
+ result += extra_argument+");"
+ result += "\n}\n"
+ return result
+
# ==================================================================================================
# Checks for the number of command-line arguments
@@ -251,9 +287,10 @@ files = [
path_clblast+"/include/clblast_c.h",
path_clblast+"/src/clblast_c.cc",
path_clblast+"/test/wrapper_clblas.h",
+ path_clblast+"/test/wrapper_cblas.h",
]
-header_lines = [84, 65, 93, 22, 22]
-footer_lines = [6, 3, 9, 2, 6]
+header_lines = [84, 65, 93, 22, 22, 31]
+footer_lines = [6, 3, 9, 2, 6, 6]
# Checks whether the command-line arguments are valid; exists otherwise
for f in files:
@@ -287,6 +324,8 @@ for i in xrange(0,len(files)):
body += clblast_c_cc(routines[level-1])
if i == 4:
body += wrapper_clblas(routines[level-1])
+ if i == 5:
+ body += wrapper_cblas(routines[level-1])
f.write("".join(file_header))
f.write(body)
f.write("".join(file_footer))
diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py
index 02040583..fffa19f6 100644
--- a/scripts/generator/routine.py
+++ b/scripts/generator/routine.py
@@ -28,7 +28,7 @@ def OptionToCLBlast(x):
}[x]
# As above, but for clBLAS data-types
-def OptionToWrapper(x):
+def OptionToWrapperCL(x):
return {
'layout': "clblasOrder",
'a_transpose': "clblasTranspose",
@@ -39,6 +39,18 @@ def OptionToWrapper(x):
'diagonal': "clblasDiag",
}[x]
+# As above, but for CBLAS data-types
+def OptionToWrapperC(x):
+ return {
+ 'layout': "CBLAS_ORDER",
+ 'a_transpose': "CBLAS_TRANSPOSE",
+ 'b_transpose': "CBLAS_TRANSPOSE",
+ 'ab_transpose': "CBLAS_TRANSPOSE",
+ 'side': "CBLAS_SIDE",
+ 'triangle': "CBLAS_UPLO",
+ 'diagonal': "CBLAS_DIAG",
+ }[x]
+
# ==================================================================================================
# Class holding routine-specific information (e.g. name, which arguments, which precisions)
@@ -119,6 +131,16 @@ class Routine():
return [", ".join(a+b+c)]
return []
+ # As above but as vectors
+ def BufferDefVector(self, name, flavour):
+ prefix = "const " if (name in self.inputs) else ""
+ if (name in self.inputs) or (name in self.outputs):
+ a = [prefix+"std::vector<"+flavour.buffertype+">& "+name+"_buffer"]
+ b = ["const size_t "+name+"_offset"]
+ c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else []
+ return [", ".join(a+b+c)]
+ return []
+
# As above but with Claduc buffers
def BufferCladuc(self, name):
if (name in self.inputs) or (name in self.outputs):
@@ -129,7 +151,7 @@ class Routine():
return []
# As above but with a static cast for clBLAS wrapper
- def BufferWrapper(self, name):
+ def BufferWrapperCL(self, name):
if (name in self.inputs) or (name in self.outputs):
a = [name+"_buffer"]
b = [name+"_offset"]
@@ -141,6 +163,24 @@ class Routine():
return [", ".join(a+b+c)]
return []
+ # As above but with a static cast for CBLAS wrapper
+ def BufferWrapperC(self, name, flavour):
+ prefix = "const " if (name in self.inputs) else ""
+ if (name in self.inputs) or (name in self.outputs):
+ if name == "sy1":
+ a = [name+"_buffer["+name+"_offset]"]
+ elif flavour.precision_name in ["C","Z"]:
+ a = ["reinterpret_cast<"+prefix+flavour.buffertype[:-1]+"*>(&"+name+"_buffer["+name+"_offset])"]
+ else:
+ a = ["&"+name+"_buffer["+name+"_offset]"]
+ c = []
+ if (name in ["x","y"]):
+ c = ["static_cast<int>("+name+"_"+self.Postfix(name)+")"]
+ elif (name in ["a","b","c"]):
+ c = [name+"_"+self.Postfix(name)]
+ return [", ".join(a+c)]
+ return []
+
# As above, but only data-types
def BufferType(self, name):
prefix = "const " if (name in self.inputs) else ""
@@ -179,6 +219,14 @@ class Routine():
return [name]
return []
+ # Retrieves the use of a scalar for CBLAS (alpha/beta)
+ def ScalarUseWrapperC(self, name, flavour):
+ if name in self.scalars:
+ if flavour.IsComplex(name):
+ return [name+"_array.data()"]
+ return [name]
+ return []
+
# Retrieves the definition of a scalar (alpha/beta)
def ScalarDef(self, name, flavour):
if name in self.scalars:
@@ -246,9 +294,16 @@ class Routine():
return []
# As above, but now using clBLAS data-types
- def OptionsDefWrapper(self):
+ def OptionsDefWrapperCL(self):
+ if self.options:
+ definitions = ["const "+OptionToWrapperCL(o)+" "+o for o in self.options]
+ return [", ".join(definitions)]
+ return []
+
+ # As above, but now using CBLAS data-types
+ def OptionsDefWrapperC(self):
if self.options:
- definitions = ["const "+OptionToWrapper(o)+" "+o for o in self.options]
+ definitions = ["const "+OptionToWrapperC(o)+" "+o for o in self.options]
return [", ".join(definitions)]
return []
@@ -284,16 +339,26 @@ class Routine():
list(chain(*[self.ScalarUse(s, flavour) for s in self.OtherScalars()])))
# As above, but for the clBLAS wrapper
- def ArgumentsWrapper(self, flavour):
+ def ArgumentsWrapperCL(self, flavour):
return (self.Options() + self.Sizes() +
- list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersFirst()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarUseWrapper("alpha", flavour) +
- list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersFirst()])) +
self.ScalarUseWrapper("beta", flavour) +
- list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferWrapperCL(b) for b in self.ScalarBuffersSecond()])) +
list(chain(*[self.ScalarUseWrapper(s, flavour) for s in self.OtherScalars()])))
+ # As above, but for the CBLAS wrapper
+ def ArgumentsWrapperC(self, flavour):
+ return (self.Options() + self.Sizes() +
+ self.ScalarUseWrapperC("alpha", flavour) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersFirst()])) +
+ self.ScalarUseWrapperC("beta", flavour) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferWrapperC(b, flavour) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarUseWrapperC(s, flavour) for s in self.OtherScalars()])))
+
# Retrieves a combination of all the argument definitions
def ArgumentsDef(self, flavour):
return (self.OptionsDef() + self.SizesDef() +
@@ -306,8 +371,8 @@ class Routine():
list(chain(*[self.ScalarDef(s, flavour) for s in self.OtherScalars()])))
# As above, but clBLAS wrapper plain datatypes
- def ArgumentsDefWrapper(self, flavour):
- return (self.OptionsDefWrapper() + self.SizesDef() +
+ def ArgumentsDefWrapperCL(self, flavour):
+ return (self.OptionsDefWrapperCL() + self.SizesDef() +
list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarDefPlain("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
@@ -315,6 +380,17 @@ class Routine():
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) +
list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()])))
+
+ # As above, but CBLAS wrapper plain datatypes
+ def ArgumentsDefWrapperC(self, flavour):
+ return (self.OptionsDefWrapperC() + self.SizesDef() +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersFirst()])) +
+ self.ScalarDefPlain("alpha", flavour) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersFirst()])) +
+ self.ScalarDefPlain("beta", flavour) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.BuffersSecond()])) +
+ list(chain(*[self.BufferDefVector(b, flavour) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()])))
# Retrieves a combination of all the argument types
def ArgumentsType(self, flavour):
@@ -356,7 +432,7 @@ class Routine():
return result
# As above, but now for the clBLAS wrapper
- def RoutineHeaderWrapper(self, flavour, def_only, spaces):
+ def RoutineHeaderWrapperCL(self, flavour, def_only, spaces):
template = "<"+flavour.template+">" if self.NoScalars() and not def_only else ""
indent = " "*(spaces + self.Length() + len(template))
result = ""
@@ -366,9 +442,16 @@ class Routine():
result += flavour.name
result += ">\n"
result += "clblasStatus clblasX"+self.name+template+"("
- result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapper(flavour)])
+ result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperCL(flavour)])
result += ",\n"+indent+"cl_uint num_queues, cl_command_queue *queues"
result += ",\n"+indent+"cl_uint num_wait_events, const cl_event *wait_events, cl_event *events)"
return result
+ # As above, but now for the CBLAS wrapper
+ def RoutineHeaderWrapperC(self, flavour, def_only, spaces):
+ indent = " "*(spaces + self.Length())
+ result = "void cblasX"+self.name+"("
+ result += (",\n"+indent).join([a for a in self.ArgumentsDefWrapperC(flavour)])+")"
+ return result
+
# ==================================================================================================