diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-05 21:27:25 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-05 21:27:25 +0200 |
commit | 674ff96fdf79b171ba4d100fefff437d7943ddc9 (patch) | |
tree | bd4a41e1803cb2ec32f50c6a8ac277fe898f61c9 /scripts/generator | |
parent | af9a521042ffc2823f60e12018db9e0a29120628 (diff) |
Added a first version of a cuBLAS wrapper (WIP)
Diffstat (limited to 'scripts/generator')
-rwxr-xr-x | scripts/generator/generator.py | 11 | ||||
-rw-r--r-- | scripts/generator/generator/convert.py | 13 | ||||
-rw-r--r-- | scripts/generator/generator/cpp.py | 46 | ||||
-rw-r--r-- | scripts/generator/generator/routine.py | 85 |
4 files changed, 149 insertions, 6 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 086b27d3..3f3fab62 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -38,11 +38,12 @@ FILES = [ "/src/clblast_c.cpp", "/test/wrapper_clblas.hpp", "/test/wrapper_cblas.hpp", + "/test/wrapper_cublas.hpp", "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", ] -HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32] -FOOTER_LINES = [25, 138, 27, 38, 6, 6, 9, 2] +HEADER_LINES = [123, 76, 126, 23, 29, 41, 29, 65, 32] +FOOTER_LINES = [25, 138, 27, 38, 6, 6, 6, 9, 2] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 @@ -194,7 +195,7 @@ def main(argv): # Re-writes the body of the file with open(library_root + FILES[i], "w") as f: body = "" - levels = [1, 2, 3] if (i == 4 or i == 5) else [1, 2, 3, 4] + levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4] for level in levels: body += cpp.LEVEL_SEPARATORS[level - 1] + "\n" for routine in ROUTINES[level - 1]: @@ -211,9 +212,11 @@ def main(argv): if i == 5: body += cpp.wrapper_cblas(routine) if i == 6: + body += cpp.wrapper_cublas(routine) + if i == 7: if not routine.batched: body += cpp.clblast_netlib_c_h(routine) - if i == 7: + if i == 8: if not routine.batched: body += cpp.clblast_netlib_c_cc(routine) f.write("".join(file_header)) diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index c0309ec3..80b6f338 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -56,6 +56,19 @@ def option_to_cblas(x): }[x] +def option_to_cublas(x): + """As above, but for clBLAS data-types""" + return { + 'layout': "cublas_has_no_layout", + 'a_transpose': "cublasOperation_t", + 'b_transpose': "cublasOperation_t", + 'ab_transpose': "cublasOperation_t", + 'side': "cublasSideMode_t", + 'triangle': "cublasFillMode_t", + 'diagonal': "cublasDiagType_t", + }[x] + + def option_to_documentation(x): """Translates an option name to a documentation string""" return { diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 03da7985..49240095 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -290,6 +290,52 @@ def wrapper_cblas(routine): return result +def wrapper_cublas(routine): + """The wrapper to the reference cuBLAS routines (for performance/correctness testing)""" + result = "" + if routine.has_tests: + result += NL + "// Forwards the cuBLAS calls for %s" % routine.short_names_tested() + NL + if routine.no_scalars(): + result += routine.routine_header_wrapper_cublas(routine.template, True, 23) + ";" + NL + for flavour in routine.flavours: + result += routine.routine_header_wrapper_cublas(flavour, False, 23) + " {" + NL + + # There is a version available in cuBLAS + if flavour.precision_name in ["S", "D", "C", "Z"]: + indent = " " * (24 + routine.length()) + arguments = routine.arguments_wrapper_cublas(flavour) + result += " cublasHandle_t handle;" + NL + result += " auto status = cublas" + flavour.name + routine.name + "(handle, " + result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL + result += " cublasDestroy(handle);" + NL + result += " return status;" + + # There is no cuBLAS available, forward the call to one of the available functions + else: # Half-precision + result += " return CUBLAS_STATUS_NOT_SUPPORTED;" + # indent = " " * (24 + routine.length()) + + # # Convert to float (note: also integer buffers are stored as half/float) + # for buf in routine.inputs + routine.outputs: + # result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL + + # # Call the float routine + # result += " cublasHandle_t handle;" + NL + # result += " auto status = cublasX" + routine.name + "(handle," + # result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL + # result += " cublasDestroy(handle);" + NL + # result += " return status;" + NL + + # # Convert back to half + # for buf in routine.outputs: + # result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL + # result += " return status;" + + # Complete + result += NL + "}" + NL + return result + + def performance_test(routine, level_string): """Generates the body of a performance test for a specific routine""" result = "" diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 59b2ed73..9414eb50 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -257,7 +257,7 @@ class Routine: return [] def buffer_def_wrapper_cl(self, name, flavour): - """As above but with data-types""" + """As above but for OpenCL""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"] @@ -266,6 +266,16 @@ class Routine: return [", ".join(a + b + c)] return [] + def buffer_def_wrapper_cuda(self, name, flavour): + """As above but for CUDA""" + prefix = "const " if name in self.inputs else "" + if name in self.inputs or name in self.outputs: + a = [prefix + flavour.buffer_type + "* " + name + "_buffer"] + b = ["const size_t " + name + "_offset"] + c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] + return [", ".join(a + b + c)] + return [] + def buffer_def_vector(self, name, flavour): """As above but as vectors""" prefix = "const " if name in self.inputs else "" @@ -329,6 +339,18 @@ class Routine: return [", ".join(a + c)] return [] + def buffer_wrapper_cublas(self, name): + """As above but for cuBLAS the wrapper""" + if name in self.inputs or name in self.outputs: + a = ["&" + name + "_buffer[" + name + "_offset]"] + c = [] + if name in ["x", "y"]: + c = ["static_cast<int>(" + name + "_" + self.postfix(name) + ")"] + elif name in ["a", "b", "c"]: + c = [name + "_" + self.postfix(name)] + return [", ".join(a + c)] + return [] + def buffer_type(self, name): """As above, but only data-types""" prefix = "const " if (name in self.inputs) else "" @@ -399,6 +421,16 @@ class Routine: return [name] return [] + def scalar_use_wrapper_by_ref(self, name, flavour): + """As above, but for the cuBLAS wrapper""" + if name in self.scalars: + if name == "alpha": + return ["&" + flavour.use_alpha_opencl()] + elif name == "beta": + return ["&" + flavour.use_beta_opencl()] + return [name] + return [] + def scalar_use_wrapper_cblas(self, name, flavour): """As above, but for the CBLAS wrapper""" if name in self.scalars: @@ -465,6 +497,12 @@ class Routine: return [", ".join([s for s in self.sizes])] return [] + def sizes_list_as_int(self): + """Retrieves a list of comma-separated sizes (m, n, k) cast to integers""" + if self.sizes: + return [", ".join(["static_cast<int>(" + s + ")" for s in self.sizes])] + return [] + def sizes_def(self): """Retrieves the definition of the sizes (m,n,k)""" if self.sizes: @@ -531,6 +569,13 @@ class Routine: return [", ".join(definitions)] return [] + def options_def_wrapper_cublas(self): + """As above, but now using cuBLAS data-types""" + if self.options: + definitions = ["const " + convert.option_to_cublas(o) + " " + o for o in self.options] + return [", ".join(definitions)] + return [] + def options_type(self): """Retrieves the types of the options (layout, transpose, side, etc.)""" if self.options: @@ -615,7 +660,7 @@ class Routine: def arguments_wrapper_cblas(self, flavour): """As above, but for the CBLAS wrapper""" - return (self.options_list() + self.sizes_list() + + return (self.options_list() + self.sizes_list_as_int() + self.scalar_use_wrapper_cblas("alpha", flavour) + list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.buffers_first()])) + self.scalar_use_wrapper_cblas("beta", flavour) + @@ -623,6 +668,17 @@ class Routine: list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_use_wrapper_cblas(s, flavour) for s in self.other_scalars()]))) + def arguments_wrapper_cublas(self, flavour): + """As above, but for the cuBLAS wrapper""" + return (self.options_list() + self.sizes_list_as_int() + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_first()])) + + self.scalar_use_wrapper_by_ref("alpha", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_first()])) + + self.scalar_use_wrapper_by_ref("beta", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_second()])) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_use_wrapper_by_ref(s, flavour) for s in self.other_scalars()]))) + def arguments_def(self, flavour): """Retrieves a combination of all the argument definitions""" return (self.options_def() + self.sizes_def() + @@ -683,6 +739,17 @@ class Routine: list(chain(*[self.buffer_def_vector(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_def_wrapper_cublas(self, flavour): + """As above, but cuBLAS wrapper plain data-types""" + return (self.options_def_wrapper_cublas() + self.sizes_def() + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_first()])) + + self.scalar_def_plain("alpha", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_first()])) + + self.scalar_def_plain("beta", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_second()])) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_type(self, flavour): """Retrieves a combination of all the argument types""" return (self.options_type() + self.sizes_type() + @@ -781,3 +848,17 @@ class Routine: result = "void cblasX" + self.name + "(" result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cblas(flavour)]) + ")" return result + + def routine_header_wrapper_cublas(self, flavour, def_only, spaces): + """As above, but now for the cuBLAS wrapper""" + template = "<" + flavour.template + ">" if self.no_scalars() and not def_only else "" + indent = " " * (spaces + self.length() + len(template)) + result = "" + if self.no_scalars(): + result += "template <" + if def_only: + result += flavour.name + result += ">\n" + result += "cublasStatus_t cublasX" + self.name + template + "(" + result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")" + return result |