summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/generator/generator.py12
-rw-r--r--scripts/generator/generator/cpp.py47
-rw-r--r--scripts/generator/generator/datatype.py6
-rw-r--r--scripts/generator/generator/routine.py28
4 files changed, 68 insertions, 25 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index df0eaca0..520e3fc8 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -12,6 +12,8 @@
# clblast.cpp
# clblast_c.h
# clblast_c.cpp
+# clblast_cuda.h
+# clblast_cuda.cpp
# clblast_netlib_c.h
# clblast_netlib_c.cpp
# wrapper_clblas.h
@@ -41,9 +43,11 @@ FILES = [
"/test/wrapper_cublas.hpp",
"/include/clblast_netlib_c.h",
"/src/clblast_netlib_c.cpp",
+ "/include/clblast_cuda.h",
+ "/src/clblast_cuda.cpp",
]
-HEADER_LINES = [122, 79, 126, 24, 29, 41, 29, 65, 32]
-FOOTER_LINES = [25, 147, 27, 38, 6, 6, 6, 9, 2]
+HEADER_LINES = [122, 21, 126, 24, 29, 41, 29, 65, 32, 94, 21]
+FOOTER_LINES = [25, 3, 27, 38, 6, 6, 6, 9, 2, 25, 3]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 63
@@ -224,6 +228,10 @@ def main(argv):
if i == 8:
if not routine.batched:
body += cpp.clblast_netlib_c_cc(routine)
+ if i == 9:
+ body += cpp.clblast_h(routine, cuda=True)
+ if i == 10:
+ body += cpp.clblast_cc(routine, cuda=True)
f.write("".join(file_header))
f.write(body)
f.write("".join(file_footer))
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 964b8f3e..2d18655f 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -36,22 +36,28 @@ HEADER = NL + SEPARATOR + """
""" + SEPARATOR + NL
-def clblast_h(routine):
+def clblast_h(routine, cuda=False):
"""The C++ API header (.h)"""
result = NL + "// " + routine.description + ": " + routine.short_names() + NL
- result += routine.routine_header_cpp(12, " = nullptr") + ";" + NL
+ result += routine.routine_header_cpp(12, " = nullptr", cuda) + ";" + NL
return result
-def clblast_cc(routine):
+def clblast_cc(routine, cuda=False):
"""The C++ API implementation (.cpp)"""
indent1 = " " * (15 + routine.length())
result = NL + "// " + routine.description + ": " + routine.short_names() + NL
if routine.implemented:
- result += routine.routine_header_cpp(12, "") + " {" + NL
+ result += routine.routine_header_cpp(12, "", cuda) + " {" + NL
result += " try {" + NL
- result += " auto queue_cpp = Queue(*queue);" + NL
- result += " auto routine = X" + routine.plain_name() + "<" + routine.template.template + ">(queue_cpp, event);" + NL
+ if cuda:
+ result += " const auto context_cpp = Context(context);" + NL
+ result += " const auto device_cpp = Device(device);" + NL
+ result += " auto queue_cpp = Queue(context_cpp, device_cpp);" + NL
+ else:
+ result += " auto queue_cpp = Queue(*queue);" + NL
+ event = "nullptr" if cuda else "event"
+ result += " auto routine = X" + routine.plain_name() + "<" + routine.template.template + ">(queue_cpp, " + event + ");" + NL
if routine.batched:
result += " " + (NL + " ").join(routine.batched_transform_to_cpp()) + NL
result += " routine.Do" + routine.capitalized_name() + "("
@@ -60,14 +66,22 @@ def clblast_cc(routine):
result += " return StatusCode::kSuccess;" + NL
result += " } catch (...) { return DispatchException(); }" + NL
else:
- result += routine.routine_header_type_cpp(12) + " {" + NL
+ result += routine.routine_header_type_cpp(12, cuda) + " {" + NL
result += " return StatusCode::kNotImplemented;" + NL
result += "}" + NL
for flavour in routine.flavours:
indent2 = " " * (34 + routine.length() + len(flavour.template))
result += "template StatusCode PUBLIC_API " + routine.capitalized_name() + "<" + flavour.template + ">("
- result += ("," + NL + indent2).join([a for a in routine.arguments_type(flavour)])
- result += "," + NL + indent2 + "cl_command_queue*, cl_event*);" + NL
+ arguments = routine.arguments_type(flavour)
+ if cuda:
+ arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments]
+ result += ("," + NL + indent2).join([a for a in arguments])
+ result += "," + NL + indent2
+ if cuda:
+ result += "const CUcontext, const CUdevice"
+ else:
+ result += "cl_command_queue*, cl_event*"
+ result += ");" + NL
return result
@@ -364,7 +378,9 @@ def performance_test(routine, level_string):
found = False
for flavour in routine.flavours:
if flavour.precision_name == precision:
- result += NL + " clblast::RunClient<clblast::TestX" + routine.plain_name() + flavour.test_template()
+ extra_template_argument = "0, " if routine.name == "gemm" and not routine.batched else ""
+ result += NL + " clblast::RunClient<clblast::TestX" + routine.plain_name()
+ result += flavour.test_template(extra_template_argument)
result += ">(argc, argv); break;" + NL
found = True
if not found:
@@ -384,10 +400,13 @@ def correctness_test(routine, level_string):
result += "int main(int argc, char *argv[]) {" + NL
result += " auto errors = size_t{0};" + NL
not_first = "false"
- for flavour in routine.flavours:
- result += " errors += clblast::RunTests<clblast::TestX" + routine.plain_name() + flavour.test_template()
- result += ">(argc, argv, " + not_first + ", \"" + flavour.name + routine.upper_name() + "\");" + NL
- not_first = "true"
+ extra_template_arguments = ["1, ", "2, "] if routine.name == "gemm" and not routine.batched else [""]
+ for extra_template_argument in extra_template_arguments:
+ for flavour in routine.flavours:
+ result += " errors += clblast::RunTests<clblast::TestX" + routine.plain_name()
+ result += flavour.test_template(extra_template_argument)
+ result += ">(argc, argv, " + not_first + ", \"" + flavour.name + routine.upper_name() + "\");" + NL
+ not_first = "true"
result += " if (errors > 0) { return 1; } else { return 0; }" + NL
result += "}" + NL
return result
diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py
index fdb584bc..f2b1c9e3 100644
--- a/scripts/generator/generator/datatype.py
+++ b/scripts/generator/generator/datatype.py
@@ -70,13 +70,13 @@ class DataType:
return self.beta_cpp + "{reinterpret_cast<const double*>(beta)[0], reinterpret_cast<const double*>(beta)[1]}"
return "beta"
- def test_template(self):
+ def test_template(self, extra_template_argument):
"""Returns the template as used in the correctness/performance tests"""
buffer_type = "clblast::" + self.buffer_type if self.is_non_standard() else self.buffer_type
beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2] else self.beta_cpp
if self.buffer_type != self.beta_cpp:
- return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp
- return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp
+ return "<" + extra_template_argument + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp
+ return "<" + extra_template_argument + buffer_type + ">, " + buffer_type + ", " + beta_cpp
def is_complex(self, scalar):
"""Current scalar is complex"""
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index cef7db87..b6b55821 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -802,22 +802,38 @@ class Routine:
"""Retrieves a list of routine requirements for documentation"""
return self.requirements
- def routine_header_cpp(self, spaces, default_event):
+ def routine_header_cpp(self, spaces, default_event, cuda=False):
"""Retrieves the C++ templated definition for a routine"""
indent = " " * (spaces + self.length())
+ arguments = self.arguments_def(self.template)
+ if cuda:
+ arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments]
result = "template <" + self.template.name + ">\n"
result += "StatusCode " + self.capitalized_name() + "("
- result += (",\n" + indent).join([a for a in self.arguments_def(self.template)])
- result += ",\n" + indent + "cl_command_queue* queue, cl_event* event" + default_event + ")"
+ result += (",\n" + indent).join([a for a in arguments])
+ result += ",\n" + indent
+ if cuda:
+ result += "const CUcontext context, const CUdevice device"
+ else:
+ result += "cl_command_queue* queue, cl_event* event" + default_event
+ result += ")"
return result
- def routine_header_type_cpp(self, spaces):
+ def routine_header_type_cpp(self, spaces, cuda=False):
"""As above, but now without variable names"""
indent = " " * (spaces + self.length())
+ arguments = self.arguments_type(self.template)
+ if cuda:
+ arguments = [a.replace("cl_mem", "CUdeviceptr") for a in arguments]
result = "template <" + self.template.name + ">\n"
result += "StatusCode " + self.capitalized_name() + "("
- result += (",\n" + indent).join([a for a in self.arguments_type(self.template)])
- result += ",\n" + indent + "cl_command_queue*, cl_event*)"
+ result += (",\n" + indent).join([a for a in arguments])
+ result += ",\n" + indent
+ if cuda:
+ result += "const CUcontext, const CUdevice"
+ else:
+ result += "cl_command_queue*, cl_event*"
+ result += ")"
return result
def routine_header_c(self, flavour, spaces, extra_qualifier):