summaryrefslogtreecommitdiff
path: root/scripts/generator
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/generator')
-rwxr-xr-xscripts/generator/generator.py8
-rw-r--r--scripts/generator/generator/cpp.py20
-rw-r--r--scripts/generator/generator/routine.py15
3 files changed, 28 insertions, 15 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 875b1a55..3d66960f 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -59,13 +59,13 @@ ald_m = "The value of `a_ld` must be at least `m`."
ald_n = "The value of `a_ld` must be at least `n`."
ald_k_one = "The value of `a_ld` must be at least `k + 1`."
ald_kl_ku_one = "The value of `a_ld` must be at least `kl + ku + 1`."
-ald_transa_m_k = "When `transpose_a == Transpose::kNo`, then `a_ld` must be at least `m`, otherwise `a_ld` must be at least `k`."
-ald_trans_n_k = "When `transpose == Transpose::kNo`, then `a_ld` must be at least `n`, otherwise `a_ld` must be at least `k`."
+ald_transa_m_k = "When `(transpose_a == Transpose::kNo && layout == Layout::kColMajor) || (transpose_a == Transpose::kYes && layout == Layout::kRowMajor)`, then `a_ld` must be at least `m`, otherwise `a_ld` must be at least `k`."
+ald_trans_n_k = "When `(transpose == Transpose::kNo && layout == Layout::kColMajor) || (transpose == Transpose::kYes && layout == Layout::kRowMajor)`, then `a_ld` must be at least `n`, otherwise `a_ld` must be at least `k`."
ald_side_m_n = "When `side = Side::kLeft` then `a_ld` must be at least `m`, otherwise `a_ld` must be at least `n`."
bld_m = "The value of `b_ld` must be at least `m`."
bld_n = "The value of `b_ld` must be at least `n`."
-bld_transb_k_n = "When `transpose_b == Transpose::kNo`, then `b_ld` must be at least `k`, otherwise `b_ld` must be at least `n`."
-bld_trans_n_k = "When `transpose == Transpose::kNo`, then `b_ld` must be at least `n`, otherwise `b_ld` must be at least `k`."
+bld_transb_k_n = "When `(transpose_b == Transpose::kNo && layout == Layout::kColMajor) || (transpose_b == Transpose::kYes && layout == Layout::kRowMajor)`, then `b_ld` must be at least `k`, otherwise `b_ld` must be at least `n`."
+bld_trans_n_k = "When `(transpose == Transpose::kNo && layout == Layout::kColMajor) || (transpose == Transpose::kYes && layout == Layout::kRowMajor)`, then `b_ld` must be at least `n`, otherwise `b_ld` must be at least `k`."
cld_m = "The value of `c_ld` must be at least `m`."
cld_n = "The value of `c_ld` must be at least `n`."
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 6dc3fc93..e32738ee 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -226,7 +226,10 @@ def wrapper_clblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " auto status = clblasX" + routine.name + "("
@@ -236,7 +239,8 @@ def wrapper_clblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
result += " return status;"
# Complete
@@ -276,10 +280,6 @@ def wrapper_cblas(routine):
extra_argument += "," + NL + indent
extra_argument += "reinterpret_cast<return_pointer_" + flavour.buffer_type[:-1] + ">"
extra_argument += "(&" + output_buffer + "_buffer[" + output_buffer + "_offset])"
- elif output_buffer in routine.index_buffers():
- assignment = "reinterpret_cast<int*>(&" + output_buffer + "_buffer[0])[" + output_buffer + "_offset] = static_cast<int>("
- postpostfix = ")"
- indent += " " * (len(assignment) + 1)
else:
assignment = output_buffer + "_buffer[" + output_buffer + "_offset]"
if flavour.name in ["Sc", "Dz"]:
@@ -299,7 +299,10 @@ def wrapper_cblas(routine):
# Convert to float (note: also integer buffers are stored as half/float)
for buf in routine.inputs + routine.outputs:
- result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ if buf not in routine.index_buffers():
+ result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer);" + NL
+ else:
+ result += " auto " + buf + "_buffer_bis = " + buf + "_buffer;" + NL
# Call the float routine
result += " cblasX" + routine.name + "("
@@ -308,7 +311,8 @@ def wrapper_cblas(routine):
# Convert back to half
for buf in routine.outputs:
- result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
+ if buf not in routine.index_buffers():
+ result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis);" + NL
# Complete
result += "}" + NL
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 8b6ab57f..c2201c0d 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -282,7 +282,10 @@ class Routine:
"""As above but for OpenCL"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "Buffer<unsigned int>& " + name + "_buffer"]
+ else:
+ a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]
@@ -292,7 +295,10 @@ class Routine:
"""As above but for CUDA"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "unsigned int * " + name + "_buffer"]
+ else:
+ a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]
@@ -302,7 +308,10 @@ class Routine:
"""As above but as vectors"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
- a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"]
+ if name == "imax":
+ a = [prefix + "std::vector<unsigned int>& " + name + "_buffer"]
+ else:
+ a = [prefix + "std::vector<" + flavour.buffer_type + ">& " + name + "_buffer"]
b = ["const size_t " + name + "_offset"]
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
return [", ".join(a + b + c)]