summaryrefslogtreecommitdiff
path: root/scripts/generator/generator/cpp.py
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2023-05-07 20:02:52 +0200
committerGitHub <noreply@github.com>2023-05-07 20:02:52 +0200
commit3d0c227fa5004067d857c74f7963876b34ed4170 (patch)
tree3ff7c3766f1c2f13717ecc4a819da0815fa7cdb3 /scripts/generator/generator/cpp.py
parent1573f7d3040ddb6005e71bf4f770566f627236d2 (diff)
AMAX/AMIN integer testing and bug fixes (#457)
* Fixed a bug in XAMAX/XMIN routines that caused the increment and offset to be included in the result * Perform proper integer-output testing in XAMAX tests * A few changes towards getting it ready for a PR * Also fix compilation for clBLAS and cuBLAS references * Fix a bug that would only use the real part of complex numbers in the amax/amin routines * A few small fixes related to the AMAX tests
Diffstat (limited to 'scripts/generator/generator/cpp.py')
-rw-r--r--scripts/generator/generator/cpp.py20
1 files changed, 12 insertions, 8 deletions
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 6dc3fc93..e32738ee 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -226,7 +226,10 @@ def wrapper_clblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " auto status = clblasX" + routine.name + "("
@@ -236,7 +239,8 @@ def wrapper_clblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
result += " return status;"
# Complete
@@ -276,10 +280,6 @@ def wrapper_cblas(routine):
extra_argument += "," + NL + indent
extra_argument += "reinterpret_cast<return_pointer_" + flavour.buffer_type[:-1] + ">"
extra_argument += "(&" + output_buffer + "_buffer[" + output_buffer + "_offset])"
- elif output_buffer in routine.index_buffers():
- assignment = "reinterpret_cast<int*>(&" + output_buffer + "_buffer[0])[" + output_buffer + "_offset] = static_cast<int>("
- postpostfix = ")"
- indent += " " * (len(assignment) + 1)
else:
assignment = output_buffer + "_buffer[" + output_buffer + "_offset]"
if flavour.name in ["Sc", "Dz"]:
@@ -299,7 +299,10 @@ def wrapper_cblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " cblasX" + routine.name + "("
@@ -308,7 +311,8 @@ def wrapper_cblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
# Complete
result += "}" + NL