summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-10-23 20:52:25 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-10-23 20:52:25 +0200
commitd45911b61dedafcbd74f65df263b4197697d6a81 (patch)
tree1f5d30db2f1b77a8258c5d0b331581938574d17e /scripts
parent44b630fc222c6e22446c20995411994b51bc2f21 (diff)
Added groundwork for col2im algorithm plus first non-working version of kernel and test
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/generator/generator.py1
-rw-r--r--scripts/generator/generator/routine.py8
2 files changed, 6 insertions, 3 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index c2637037..27107739 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -181,6 +181,7 @@ ROUTINES = [
Routine(True, True, 0, False, "x", "had", T, [S,D,C,Z,H], ["n"], [], ["x","y"], ["z"], [xn,yn,zn], ["alpha","beta"], "", "Element-wise vector product (Hadamard)", "Performs the Hadamard element-wise product _z = alpha * x * y + beta * z_, in which _x_, _y_, and _z_ are vectors and _alpha_ and _beta_ are scalar constants.", []),
Routine(True, True, 0, False, "x", "omatcopy", T, [S,D,C,Z,H], ["m","n"], ["layout","a_transpose"], ["a"], ["b"], [amn,bnma], ["alpha"], "", "Scaling and out-place transpose/copy (non-BLAS function)", "Performs scaling and out-of-place transposition/copying of matrices according to _B = alpha*op(A)_, in which _A_ is an input matrix (_m_ rows by _n_ columns), _B_ an output matrix, and _alpha_ a scalar value. The operation _op_ can be a normal matrix copy, a transposition or a conjugate transposition.", [ald_m, bld_n]),
Routine(True, True, 0, False, "x", "im2col", T, [S,D,C,Z,H], im2col_constants, [], ["im"], ["col"], [im,col], [""], "", "Im2col function (non-BLAS function)", "Performs the im2col algorithm, in which _im_ is the input matrix and _col_ is the output matrix.", []),
+ Routine(True, True, 0, False, "x", "col2im", T, [S,D,C,Z,H], im2col_constants, [], ["col"], ["im"], [col,im], [""], "", "Col2im function (non-BLAS function)", "Performs the col2im algorithm, in which _col_ is the input matrix and _im_ is the output matrix.", []),
Routine(True, True, 0, False, "x", "convgemm", T, [S,D,H], convgemm_constants, [], ["im","kernel"], ["result"], [imb,kernel,result],[""], "", "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched 3D convolution, in which _im_ is the 4D input tensor (NCHW - batch-channelin-height-width), _kernel_ the 4D kernel weights tensor (KCHW - channelout-channelin-height-width), and _result_ the 4D output tensor (NCHW - batch-channelout-height-width).", []),
# Batched routines:
Routine(True, True, 1, False, "x", "axpy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], [xn,yn], ["alpha"], "", "Batched version of AXPY", "As AXPY, but multiple operations are batched together for better performance.", []),
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 7321349d..3b5a6b76 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -205,7 +205,7 @@ class Routine:
def no_scalars(self):
"""Determines whether or not this routine has scalar arguments (alpha/beta)"""
- return self.scalars == [] or self.name in ["im2col", "convgemm"]
+ return self.scalars == [] or self.name in ["im2col", "col2im", "convgemm"]
def has_layout(self):
"""Determines whether the layout is an argument"""
@@ -226,12 +226,14 @@ class Routine:
"""Determines which buffers go first (between alpha and beta) and which ones go after"""
if self.level == "2b" or self.name == "had":
return ["x", "y"]
- return ["ap", "a", "b", "x", "im", "kernel"]
+ extra_buffer = "col" if self.name == "col2im" else "im"
+ return ["ap", "a", "b", "x", extra_buffer, "kernel"]
def buffers_second(self):
if self.level == "2b" or self.name == "had":
return ["z", "ap", "a", "b", "c"]
- return ["y", "c", "col", "result"]
+ extra_buffer = "im" if self.name == "col2im" else "col"
+ return ["y", "c", extra_buffer, "result"]
def buffer(self, name):
"""Retrieves a variable name for a specific input/output vector/matrix (e.g. 'x')"""