Added convgemm skeleton, test infrastructure, and first reference implementation

author: Cedric Nugteren <web@cedricnugteren.nl> 2018-05-06 11:35:34 +0200
committer: Cedric Nugteren <web@cedricnugteren.nl> 2018-05-06 11:35:34 +0200
commit: 2d1f6ba7fe842ba938490fc599b6ebd209b6560b (patch)
tree: f1a284e5dc0163b7fed938a3efeb39432b9d3788 /scripts
parent: 2776d761768295b01a8be7c333dbb337805d7f77 (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index e2837dd5..f04d9f3d 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -181,7 +181,7 @@ ROUTINES = [
   Routine(True,  True,  0, False, "x", "had",      T, [S,D,C,Z,H],   ["n"],                [],                                                    ["x","y"],  ["z"],                        [xn,yn,zn],      ["alpha","beta"], "",    "Element-wise vector product (Hadamard)", "Performs the Hadamard element-wise product _z = alpha * x * y + beta * z_, in which _x_, _y_, and _z_ are vectors and _alpha_ and _beta_ are scalar constants.", []),
   Routine(True,  True,  0, False, "x", "omatcopy", T, [S,D,C,Z,H],   ["m","n"],            ["layout","a_transpose"],                              ["a"],      ["b"],                        [amn,bnma],      ["alpha"],        "",    "Scaling and out-place transpose/copy (non-BLAS function)", "Performs scaling and out-of-place transposition/copying of matrices according to _B = alpha*op(A)_, in which _A_ is an input matrix (_m_ rows by _n_ columns), _B_ an output matrix, and _alpha_ a scalar value. The operation _op_ can be a normal matrix copy, a transposition or a conjugate transposition.", [ald_m, bld_n]),
   Routine(True,  True,  0, False, "x", "im2col",   T, [S,D,C,Z,H],   im2col_constants,     [],                                                    ["im"],     ["col"],                      [im,col],        [""],             "",    "Im2col function (non-BLAS function)", "Performs the im2col algorithm, in which _im_ is the input matrix and _col_ is the output matrix.", []),
-  Routine(False, True,  0, False, "x", "convgemm", T, [S,D,C,Z,H],   convgemm_constants,   [],                                                    ["im","kernel"], ["result"],              [imb,kernel,result],[""],          "",    "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched convolution, in which _im_ is the 4D input tensor, _kernel_ the 3D kernel weights tensor, and _result_ the 4D output tensor.", []),
+  Routine(True,  True,  0, False, "x", "convgemm", T, [S,D,C,Z,H],   convgemm_constants,   [],                                                    ["im","kernel"], ["result"],              [imb,kernel,result],[""],          "",    "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched 3D convolution, in which _im_ is the 4D input tensor, _kernel_ the 4D kernel weights tensor, and _result_ the 4D output tensor.", []),
   # Batched routines:
   Routine(True,  True,  1, False, "x", "axpy",     T, [S,D,C,Z,H],   ["n"],                [],                                                    ["x"],      ["y"],                        [xn,yn],         ["alpha"],        "",    "Batched version of AXPY", "As AXPY, but multiple operations are batched together for better performance.", []),
   Routine(True,  True,  1, False, "x", "gemm",     T, [S,D,C,Z,H],   ["m","n","k"],        ["layout","a_transpose","b_transpose"],                ["a","b"],  ["c"],                        [amk,bkn,cmn],   ["alpha","beta"], "",    "Batched version of GEMM", "As GEMM, but multiple operations are batched together for better performance.", [ald_transa_m_k, bld_transb_k_n, cld_m]),
author	Cedric Nugteren <web@cedricnugteren.nl>	2018-05-06 11:35:34 +0200
committer	Cedric Nugteren <web@cedricnugteren.nl>	2018-05-06 11:35:34 +0200
commit	2d1f6ba7fe842ba938490fc599b6ebd209b6560b (patch)
tree	f1a284e5dc0163b7fed938a3efeb39432b9d3788 /scripts
parent	2776d761768295b01a8be7c333dbb337805d7f77 (diff)