summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2023-05-07 20:02:52 +0200
committerGitHub <noreply@github.com>2023-05-07 20:02:52 +0200
commit3d0c227fa5004067d857c74f7963876b34ed4170 (patch)
tree3ff7c3766f1c2f13717ecc4a819da0815fa7cdb3 /scripts
parent1573f7d3040ddb6005e71bf4f770566f627236d2 (diff)
AMAX/AMIN integer testing and bug fixes (#457)
* Fixed a bug in XAMAX/XMIN routines that caused the increment and offset to be included in the result * Perform proper integer-output testing in XAMAX tests * A few changes towards getting it ready for a PR * Also fix compilation for clBLAS and cuBLAS references * Fix a bug that would only use the real part of complex numbers in the amax/amin routines * A few small fixes related to the AMAX tests
Diffstat (limited to 'scripts')
-rw-r--r--scripts/generator/generator/cpp.py20
-rw-r--r--scripts/generator/generator/routine.py15
2 files changed, 24 insertions, 11 deletions
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 6dc3fc93..e32738ee 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -226,7 +226,10 @@ def wrapper_clblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " auto status = clblasX" + routine.name + "("
@@ -236,7 +239,8 @@ def wrapper_clblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
result += " return status;"
# Complete
@@ -276,10 +280,6 @@ def wrapper_cblas(routine):
extra_argument += "," + NL + indent
extra_argument += "reinterpret_cast<return_pointer_" + flavour.buffer_type[:-1] + ">"
extra_argument += "(&" + output_buffer + "_buffer[" + output_buffer + "_offset])"
- elif output_buffer in routine.index_buffers():
- assignment = "reinterpret_cast<int*>(&" + output_buffer + "_buffer[0])[" + output_buffer + "_offset] = static_cast<int>("
- postpostfix = ")"
- indent += " " * (len(assignment) + 1)
else:
assignment = output_buffer + "_buffer[" + output_buffer + "_offset]"
if flavour.name in ["Sc", "Dz"]:
@@ -299,7 +299,10 @@ def wrapper_cblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " cblasX" + routine.name + "("
@@ -308,7 +311,8 @@ def wrapper_cblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
# Complete
result += "}" + NL
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 8b6ab57f..c2201c0d 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -282,7 +282,10 @@ class Routine:
"""As above but for OpenCL"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "Buffer<unsigned int>& " + name + "_buffer"]
+ else:
+ a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]
@@ -292,7 +295,10 @@ class Routine:
"""As above but for CUDA"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "unsigned int * " + name + "_buffer"]
+ else:
+ a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]
@@ -302,7 +308,10 @@ class Routine:
"""As above but as vectors"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "std::vector<unsigned int>& " + name + "_buffer"]
+ else:
+ a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]