diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/generator/generator.py | 12 | ||||
-rw-r--r-- | scripts/generator/generator/cpp.py | 47 | ||||
-rw-r--r-- | scripts/generator/generator/datatype.py | 6 | ||||
-rw-r--r-- | scripts/generator/generator/routine.py | 28 |
4 files changed, 68 insertions, 25 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index df0eaca0..520e3fc8 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -12,6 +12,8 @@ # clblast.cpp # clblast_c.h # clblast_c.cpp +# clblast_cuda.h +# clblast_cuda.cpp # clblast_netlib_c.h # clblast_netlib_c.cpp # wrapper_clblas.h @@ -41,9 +43,11 @@ FILES = [ "/test/wrapper_cublas.hpp", "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", + "/include/clblast_cuda.h", + "/src/clblast_cuda.cpp", ] -HEADER_LINES = [122, 79, 126, 24, 29, 41, 29, 65, 32] -FOOTER_LINES = [25, 147, 27, 38, 6, 6, 6, 9, 2] +HEADER_LINES = [122, 21, 126, 24, 29, 41, 29, 65, 32, 94, 21] +FOOTER_LINES = [25, 3, 27, 38, 6, 6, 6, 9, 2, 25, 3] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 @@ -224,6 +228,10 @@ def main(argv): if i == 8: if not routine.batched: body += cpp.clblast_netlib_c_cc(routine) + if i == 9: + body += cpp.clblast_h(routine, cuda=True) + if i == 10: + body += cpp.clblast_cc(routine, cuda=True) f.write("".join(file_header)) f.write(body) f.write("".join(file_footer)) diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 964b8f3e..2d18655f 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -36,22 +36,28 @@ HEADER = NL + SEPARATOR + """ """ + SEPARATOR + NL -def clblast_h(routine): +def clblast_h(routine, cuda=False): """The C++ API header (.h)""" result = NL + "// " + routine.description + ": " + routine.short_names() + NL - result += routine.routine_header_cpp(12, " = nullptr") + ";" + NL + result += routine.routine_header_cpp(12, " = nullptr", cuda) + ";" + NL return result -def clblast_cc(routine): +def clblast_cc(routine, cuda=False): """The C++ API implementation (.cpp)""" indent1 = " " * (15 + routine.length()) result = NL + "// " + routine.description + ": " + routine.short_names() + NL if routine.implemented: - result += routine.routine_header_cpp(12, "") + " {" + NL + result += routine.routine_header_cpp(12, "", cuda) + " {" + NL result += " try {" + NL - result += " auto queue_cpp = Queue(*queue);" + NL - result += " auto routine = X" + routine.plain_name() + "<" + routine.template.template + ">(queue_cpp, event);" + NL + if cuda: + result += " const auto context_cpp = Context(context);" + NL + result += " const auto device_cpp = Device(device);" + NL + result += " auto queue_cpp = Queue(context_cpp, device_cpp);" + NL + else: + result += " auto queue_cpp = Queue(*queue);" + NL + event = "nullptr" if cuda else "event" + result += " auto routine = X" + routine.plain_name() + "<" + routine.template.template + ">(queue_cpp, " + event + ");" + NL if routine.batched: result += " " + (NL + " ").join(routine.batched_transform_to_cpp()) + NL result += " routine.Do" + routine.capitalized_name() + "(" @@ -60,14 +66,22 @@ def clblast_cc(routine): result += " return StatusCode::kSuccess;" + NL result += " } catch (...) { return DispatchException(); }" + NL else: - result += routine.routine_header_type_cpp(12) + " {" + NL + result += routine.routine_header_type_cpp(12, cuda) + " {" + NL result += " return StatusCode::kNotImplemented;" + NL result += "}" + NL for flavour in routine.flavours: indent2 = " " * (34 + routine.length() + len(flavour.template)) result += "template StatusCode PUBLIC_API " + routine.capitalized_name() + "<" + flavour.template + ">(" - result += ("," + NL + indent2).join([a for a in routine.arguments_type(flavour)]) - result += "," + NL + indent2 + "cl_command_queue*, cl_event*);" + NL + arguments = routine.arguments_type(flavour) + if cuda: + arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments] + result += ("," + NL + indent2).join([a for a in arguments]) + result += "," + NL + indent2 + if cuda: + result += "const CUcontext, const CUdevice" + else: + result += "cl_command_queue*, cl_event*" + result += ");" + NL return result @@ -364,7 +378,9 @@ def performance_test(routine, level_string): found = False for flavour in routine.flavours: if flavour.precision_name == precision: - result += NL + " clblast::RunClient<clblast::TestX" + routine.plain_name() + flavour.test_template() + extra_template_argument = "0, " if routine.name == "gemm" and not routine.batched else "" + result += NL + " clblast::RunClient<clblast::TestX" + routine.plain_name() + result += flavour.test_template(extra_template_argument) result += ">(argc, argv); break;" + NL found = True if not found: @@ -384,10 +400,13 @@ def correctness_test(routine, level_string): result += "int main(int argc, char *argv[]) {" + NL result += " auto errors = size_t{0};" + NL not_first = "false" - for flavour in routine.flavours: - result += " errors += clblast::RunTests<clblast::TestX" + routine.plain_name() + flavour.test_template() - result += ">(argc, argv, " + not_first + ", \"" + flavour.name + routine.upper_name() + "\");" + NL - not_first = "true" + extra_template_arguments = ["1, ", "2, "] if routine.name == "gemm" and not routine.batched else [""] + for extra_template_argument in extra_template_arguments: + for flavour in routine.flavours: + result += " errors += clblast::RunTests<clblast::TestX" + routine.plain_name() + result += flavour.test_template(extra_template_argument) + result += ">(argc, argv, " + not_first + ", \"" + flavour.name + routine.upper_name() + "\");" + NL + not_first = "true" result += " if (errors > 0) { return 1; } else { return 0; }" + NL result += "}" + NL return result diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py index fdb584bc..f2b1c9e3 100644 --- a/scripts/generator/generator/datatype.py +++ b/scripts/generator/generator/datatype.py @@ -70,13 +70,13 @@ class DataType: return self.beta_cpp + "{reinterpret_cast<const double*>(beta)[0], reinterpret_cast<const double*>(beta)[1]}" return "beta" - def test_template(self): + def test_template(self, extra_template_argument): """Returns the template as used in the correctness/performance tests""" buffer_type = "clblast::" + self.buffer_type if self.is_non_standard() else self.buffer_type beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2] else self.beta_cpp if self.buffer_type != self.beta_cpp: - return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp - return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp + return "<" + extra_template_argument + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp + return "<" + extra_template_argument + buffer_type + ">, " + buffer_type + ", " + beta_cpp def is_complex(self, scalar): """Current scalar is complex""" diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index cef7db87..b6b55821 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -802,22 +802,38 @@ class Routine: """Retrieves a list of routine requirements for documentation""" return self.requirements - def routine_header_cpp(self, spaces, default_event): + def routine_header_cpp(self, spaces, default_event, cuda=False): """Retrieves the C++ templated definition for a routine""" indent = " " * (spaces + self.length()) + arguments = self.arguments_def(self.template) + if cuda: + arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments] result = "template <" + self.template.name + ">\n" result += "StatusCode " + self.capitalized_name() + "(" - result += (",\n" + indent).join([a for a in self.arguments_def(self.template)]) - result += ",\n" + indent + "cl_command_queue* queue, cl_event* event" + default_event + ")" + result += (",\n" + indent).join([a for a in arguments]) + result += ",\n" + indent + if cuda: + result += "const CUcontext context, const CUdevice device" + else: + result += "cl_command_queue* queue, cl_event* event" + default_event + result += ")" return result - def routine_header_type_cpp(self, spaces): + def routine_header_type_cpp(self, spaces, cuda=False): """As above, but now without variable names""" indent = " " * (spaces + self.length()) + arguments = self.arguments_type(self.template) + if cuda: + arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments] result = "template <" + self.template.name + ">\n" result += "StatusCode " + self.capitalized_name() + "(" - result += (",\n" + indent).join([a for a in self.arguments_type(self.template)]) - result += ",\n" + indent + "cl_command_queue*, cl_event*)" + result += (",\n" + indent).join([a for a in arguments]) + result += ",\n" + indent + if cuda: + result += "const CUcontext, const CUdevice" + else: + result += "cl_command_queue*, cl_event*" + result += ")" return result def routine_header_c(self, flavour, spaces, extra_qualifier): |