diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2023-05-07 20:02:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-07 20:02:52 +0200 |
commit | 3d0c227fa5004067d857c74f7963876b34ed4170 (patch) | |
tree | 3ff7c3766f1c2f13717ecc4a819da0815fa7cdb3 /scripts | |
parent | 1573f7d3040ddb6005e71bf4f770566f627236d2 (diff) |
AMAX/AMIN integer testing and bug fixes (#457)
* Fixed a bug in XAMAX/XMIN routines that caused the increment and offset to be included in the result
* Perform proper integer-output testing in XAMAX tests
* A few changes towards getting it ready for a PR
* Also fix compilation for clBLAS and cuBLAS references
* Fix a bug that would only use the real part of complex numbers in the amax/amin routines
* A few small fixes related to the AMAX tests
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/generator/generator/cpp.py | 20 | ||||
-rw-r--r-- | scripts/generator/generator/routine.py | 15 |
2 files changed, 24 insertions, 11 deletions
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 6dc3fc93..e32738ee 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -226,7 +226,10 @@ def wrapper_clblas(routine): # Convert to float (note: also integer buffers are stored as half/float) for buf in routine.inputs + routine.outputs: - result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL + if buf not in routine.index_buffers(): + result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL + else: + result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL # Call the float routine result += " auto status = clblasX" + routine.name + "(" @@ -236,7 +239,8 @@ def wrapper_clblas(routine): # Convert back to half for buf in routine.outputs: - result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL + if buf not in routine.index_buffers(): + result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL result += " return status;" # Complete @@ -276,10 +280,6 @@ def wrapper_cblas(routine): extra_argument += "," + NL + indent extra_argument += "reinterpret_cast<return_pointer_" + flavour.buffer_type[:-1] + ">" extra_argument += "(&" + output_buffer + "_buffer[" + output_buffer + "_offset])" - elif output_buffer in routine.index_buffers(): - assignment = "reinterpret_cast<int*>(&" + output_buffer + "_buffer[0])[" + output_buffer + "_offset] = static_cast<int>(" - postpostfix = ")" - indent += " " * (len(assignment) + 1) else: assignment = output_buffer + "_buffer[" + output_buffer + "_offset]" if flavour.name in ["Sc", "Dz"]: @@ -299,7 +299,10 @@ def wrapper_cblas(routine): # Convert to float (note: also integer buffers are stored as half/float) for buf in routine.inputs + routine.outputs: - result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL + if buf not in routine.index_buffers(): + result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL + else: + result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL # Call the float routine result += " cblasX" + routine.name + "(" @@ -308,7 +311,8 @@ def wrapper_cblas(routine): # Convert back to half for buf in routine.outputs: - result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL + if buf not in routine.index_buffers(): + result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL # Complete result += "}" + NL diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 8b6ab57f..c2201c0d 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -282,7 +282,10 @@ class Routine: """As above but for OpenCL""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"] + if name == "imax": + a = [prefix + "Buffer<unsigned int>& " + name + "_buffer"] + else: + a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"] b = ["const size_t " + name + "_offset"] c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] return [", ".join(a + b + c)] @@ -292,7 +295,10 @@ class Routine: """As above but for CUDA""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - a = [prefix + flavour.buffer_type + "* " + name + "_buffer"] + if name == "imax": + a = [prefix + "unsigned int * " + name + "_buffer"] + else: + a = [prefix + flavour.buffer_type + "* " + name + "_buffer"] b = ["const size_t " + name + "_offset"] c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] return [", ".join(a + b + c)] @@ -302,7 +308,10 @@ class Routine: """As above but as vectors""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"] + if name == "imax": + a = [prefix + "std::vector<unsigned int>& " + name + "_buffer"] + else: + a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"] b = ["const size_t " + name + "_offset"] c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] return [", ".join(a + b + c)] |