From 032e3b0cc00a15dd2af8b4fb82d261eb7b086e26 Mon Sep 17 00:00:00 2001
From: Koichi Akabe <vbkaisetsu@gmail.com>
Date: Mon, 12 Nov 2018 10:12:07 +0900
Subject: Add kernel_mode option to im2col, col2im, and convgemm functions

---
 scripts/generator/generator.py         | 8 ++++----
 scripts/generator/generator/convert.py | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'scripts/generator')

diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index f8022d81..68e3f01a 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -49,7 +49,7 @@ FILES = [
     "/src/clblast_cuda.cpp",
     "/src/pyclblast/src/pyclblast.pyx"
 ]
-HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 40, 95, 21, 290]
+HEADER_LINES = [124, 21, 128, 24, 29, 45, 29, 66, 40, 96, 21, 290]
 FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37]
 HEADER_LINES_DOC = 0
 FOOTER_LINES_DOC = 232
@@ -180,9 +180,9 @@ ROUTINES = [
   # Special routines:
   Routine(True,  True,  0, False, "x", "had",      T, [S,D,C,Z,H],   ["n"],                [],                                                    ["x","y"],  ["z"],                        [xn,yn,zn],      ["alpha","beta"], "",    "Element-wise vector product (Hadamard)", "Performs the Hadamard element-wise product _z = alpha * x * y + beta * z_, in which _x_, _y_, and _z_ are vectors and _alpha_ and _beta_ are scalar constants.", []),
   Routine(True,  True,  0, False, "x", "omatcopy", T, [S,D,C,Z,H],   ["m","n"],            ["layout","a_transpose"],                              ["a"],      ["b"],                        [amn,bnma],      ["alpha"],        "",    "Scaling and out-place transpose/copy (non-BLAS function)", "Performs scaling and out-of-place transposition/copying of matrices according to _B = alpha*op(A)_, in which _A_ is an input matrix (_m_ rows by _n_ columns), _B_ an output matrix, and _alpha_ a scalar value. The operation _op_ can be a normal matrix copy, a transposition or a conjugate transposition.", [ald_m, bld_n]),
-  Routine(True,  True,  0, False, "x", "im2col",   T, [S,D,C,Z,H],   im2col_constants,     [],                                                    ["im"],     ["col"],                      [im,col],        [""],             "",    "Im2col function (non-BLAS function)", "Performs the im2col algorithm, in which _im_ is the input matrix and _col_ is the output matrix. Overwrites any existing values in the _col_ buffer", []),
-  Routine(True,  True,  0, False, "x", "col2im",   T, [S,D,C,Z,H],   im2col_constants,     [],                                                    ["col"],    ["im"],                       [col,im],        [""],             "",    "Col2im function (non-BLAS function)", "Performs the col2im algorithm, in which _col_ is the input matrix and _im_ is the output matrix. Accumulates results on top of the existing values in the _im_ buffer.", []),
-  Routine(True,  True,  0, False, "x", "convgemm", T, [S,D,H],       convgemm_constants,   [],                                                    ["im","kernel"], ["result"],              [imb,kernel,result],[""],          "",    "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched 3D convolution, in which _im_ is the 4D input tensor (NCHW - batch-channelin-height-width), _kernel_ the 4D kernel weights tensor (KCHW - channelout-channelin-height-width), and _result_ the 4D output tensor (NCHW - batch-channelout-height-width).", []),
+  Routine(True,  True,  0, False, "x", "im2col",   T, [S,D,C,Z,H],   im2col_constants,     ["kernel_mode"],                                       ["im"],     ["col"],                      [im,col],        [""],             "",    "Im2col function (non-BLAS function)", "Performs the im2col algorithm, in which _im_ is the input matrix and _col_ is the output matrix. Overwrites any existing values in the _col_ buffer", []),
+  Routine(True,  True,  0, False, "x", "col2im",   T, [S,D,C,Z,H],   im2col_constants,     ["kernel_mode"],                                       ["col"],    ["im"],                       [col,im],        [""],             "",    "Col2im function (non-BLAS function)", "Performs the col2im algorithm, in which _col_ is the input matrix and _im_ is the output matrix. Accumulates results on top of the existing values in the _im_ buffer.", []),
+  Routine(True,  True,  0, False, "x", "convgemm", T, [S,D,H],       convgemm_constants,   ["kernel_mode"],                                       ["im","kernel"], ["result"],              [imb,kernel,result],[""],          "",    "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched 3D convolution, in which _im_ is the 4D input tensor (NCHW - batch-channelin-height-width), _kernel_ the 4D kernel weights tensor (KCHW - channelout-channelin-height-width), and _result_ the 4D output tensor (NCHW - batch-channelout-height-width).", []),
   # Batched routines:
   Routine(True,  True,  1, False, "x", "axpy",     T, [S,D,C,Z,H],   ["n"],                [],                                                    ["x"],      ["y"],                        [xn,yn],         ["alpha"],        "",    "Batched version of AXPY", "As AXPY, but multiple operations are batched together for better performance.", []),
   Routine(True,  True,  1, False, "x", "gemm",     T, [S,D,C,Z,H],   ["m","n","k"],        ["layout","a_transpose","b_transpose"],                ["a","b"],  ["c"],                        [amk,bkn,cmn],   ["alpha","beta"], "",    "Batched version of GEMM", "As GEMM, but multiple operations are batched together for better performance.", [ald_transa_m_k, bld_transb_k_n, cld_m]),
diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py
index 07f45669..16890d27 100644
--- a/scripts/generator/generator/convert.py
+++ b/scripts/generator/generator/convert.py
@@ -27,6 +27,7 @@ def option_to_clblast(x):
         'side': "Side",
         'triangle': "Triangle",
         'diagonal': "Diagonal",
+        'kernel_mode': "KernelMode",
     }[x]
 
 
@@ -79,4 +80,5 @@ def option_to_documentation(x):
         'side': "The position of the triangular matrix in the operation, either on the `Side::kLeft` (141) or `Side::kRight` (142).",
         'triangle': "The part of the array of the triangular matrix to be used, either `Triangle::kUpper` (121) or `Triangle::kLower` (122).",
         'diagonal': "The property of the diagonal matrix, either `Diagonal::kNonUnit` (131) for non-unit values on the diagonal or `Diagonal::kUnit` (132) for unit values on the diagonal.",
+        'kernel_mode': "The kernel mode, either `KernelMode::kCrossCorrelation` for the normal mode, or `KernelMode::kConvolution` for the convolution mode that flips a kernel along `h` and `w` axes.",
     }[x]
-- 
cgit v1.2.3