summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-05 14:06:33 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-05 14:06:33 +0200
commit2776d761768295b01a8be7c333dbb337805d7f77 (patch)
tree3fb955ea8e6d962e60dc6c8eba4b5e9800c3db91 /scripts
parent8b381480f8bd3b40cc030b07a599da10092b8117 (diff)
Added interface of batched convolution as GEMM
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/generator/generator.py12
-rw-r--r--scripts/generator/generator/routine.py15
2 files changed, 19 insertions, 8 deletions
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 32420962..e2837dd5 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -50,7 +50,7 @@ FILES = [
"/src/pyclblast/src/pyclblast.pyx"
]
HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 290]
-FOOTER_LINES = [98, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 37]
+FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 232
@@ -106,11 +106,16 @@ ammn = size_helper("layout == CLBlastLayoutRowMajor", "m", "((side == CLBlastSid
bmnn = size_helper("layout == CLBlastLayoutRowMajor", "((side == CLBlastSideLeft) ? m : n)", "n", "b_ld")
im = "height * width * channels"
col = "height * width * channels"
+imb = "height * width * channels * batch_count"
+kernel = "kernel_h * kernel_w * num_kernels"
+result = "height_out * width_out * num_kernels * batch_count"
+
# ==================================================================================================
# Populates a list of routines
im2col_constants = ["channels", "height", "width", "kernel_h", "kernel_w", "pad_h", "pad_w", "stride_h", "stride_w", "dilation_h", "dilation_w"]
+convgemm_constants = im2col_constants + ["num_kernels", "batch_count"]
ROUTINES = [
[ # Level 1: vector-vector
Routine(False, True, 0, False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], ["1","1","1","1"], [], "", "Generate givens plane rotation", "", []),
@@ -176,6 +181,7 @@ ROUTINES = [
Routine(True, True, 0, False, "x", "had", T, [S,D,C,Z,H], ["n"], [], ["x","y"], ["z"], [xn,yn,zn], ["alpha","beta"], "", "Element-wise vector product (Hadamard)", "Performs the Hadamard element-wise product _z = alpha * x * y + beta * z_, in which _x_, _y_, and _z_ are vectors and _alpha_ and _beta_ are scalar constants.", []),
Routine(True, True, 0, False, "x", "omatcopy", T, [S,D,C,Z,H], ["m","n"], ["layout","a_transpose"], ["a"], ["b"], [amn,bnma], ["alpha"], "", "Scaling and out-place transpose/copy (non-BLAS function)", "Performs scaling and out-of-place transposition/copying of matrices according to _B = alpha*op(A)_, in which _A_ is an input matrix (_m_ rows by _n_ columns), _B_ an output matrix, and _alpha_ a scalar value. The operation _op_ can be a normal matrix copy, a transposition or a conjugate transposition.", [ald_m, bld_n]),
Routine(True, True, 0, False, "x", "im2col", T, [S,D,C,Z,H], im2col_constants, [], ["im"], ["col"], [im,col], [""], "", "Im2col function (non-BLAS function)", "Performs the im2col algorithm, in which _im_ is the input matrix and _col_ is the output matrix.", []),
+ Routine(False, True, 0, False, "x", "convgemm", T, [S,D,C,Z,H], convgemm_constants, [], ["im","kernel"], ["result"], [imb,kernel,result],[""], "", "Batched convolution as GEMM (non-BLAS function)", "Integrates im2col and GEMM for batched convolution, in which _im_ is the 4D input tensor, _kernel_ the 3D kernel weights tensor, and _result_ the 4D output tensor.", []),
# Batched routines:
Routine(True, True, 1, False, "x", "axpy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], [xn,yn], ["alpha"], "", "Batched version of AXPY", "As AXPY, but multiple operations are batched together for better performance.", []),
Routine(True, True, 1, False, "x", "gemm", T, [S,D,C,Z,H], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], [amk,bkn,cmn], ["alpha","beta"], "", "Batched version of GEMM", "As GEMM, but multiple operations are batched together for better performance.", [ald_transa_m_k, bld_transb_k_n, cld_m]),
@@ -230,10 +236,10 @@ def main(argv):
if i == 6:
body += cpp.wrapper_cublas(routine)
if i == 7:
- if routine.batched == 0:
+ if routine.batched == 0 and routine.name not in ["convgemm"]:
body += cpp.clblast_netlib_c_h(routine)
if i == 8:
- if routine.batched == 0:
+ if routine.batched == 0 and routine.name not in ["convgemm"]:
body += cpp.clblast_netlib_c_cc(routine)
if i == 9:
body += cpp.clblast_h(routine, cuda=True)
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 317c8e7b..7321349d 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -142,6 +142,11 @@ class Routine:
return ["a", "b", "c", "ap"]
@staticmethod
+ def buffers_tensor():
+ """Distinguish between vectors and matrices and tensors"""
+ return ["im", "col", "kernel", "result"]
+
+ @staticmethod
def routines_scalar_no_return():
return ["dotu", "dotc"]
@@ -187,7 +192,7 @@ class Routine:
def buffers_without_ld_inc(self):
"""List of buffers without 'inc' or 'ld'"""
- return self.scalar_buffers_first() + self.scalar_buffers_second() + ["ap", "im", "col"]
+ return self.scalar_buffers_first() + self.scalar_buffers_second() + ["ap", "im", "col", "kernel", "result"]
def get_buffer_type(self, name, flavour):
if name in self.index_buffers():
@@ -200,7 +205,7 @@ class Routine:
def no_scalars(self):
"""Determines whether or not this routine has scalar arguments (alpha/beta)"""
- return self.scalars == [] or self.name == "im2col"
+ return self.scalars == [] or self.name in ["im2col", "convgemm"]
def has_layout(self):
"""Determines whether the layout is an argument"""
@@ -221,12 +226,12 @@ class Routine:
"""Determines which buffers go first (between alpha and beta) and which ones go after"""
if self.level == "2b" or self.name == "had":
return ["x", "y"]
- return ["ap", "a", "b", "x", "im"]
+ return ["ap", "a", "b", "x", "im", "kernel"]
def buffers_second(self):
if self.level == "2b" or self.name == "had":
return ["z", "ap", "a", "b", "c"]
- return ["y", "c", "col"]
+ return ["y", "c", "col", "result"]
def buffer(self, name):
"""Retrieves a variable name for a specific input/output vector/matrix (e.g. 'x')"""
@@ -397,7 +402,7 @@ class Routine:
prefix = "const " if (name in self.inputs) else ""
inout = "input" if (name in self.inputs) else "output"
if (name in self.inputs) or (name in self.outputs):
- math_name = name.upper() + " matrix" if (name in self.buffers_matrix()) else name + " vector"
+ math_name = name.upper() + " matrix" if (name in self.buffers_matrix()) else name + " tensor" if (name in self.buffers_tensor()) else name + " vector"
inc_ld_description = "Leading dimension " if (name in self.buffers_matrix()) else "Stride/increment "
a = ["`" + prefix + "cl_mem " + name + "_buffer`: OpenCL buffer to store the " + inout + " " + math_name + "."]
b = ["`const size_t " + self.b_star() + name + "_offset" + self.b_s() + "`: The offset" + self.b_s() + " in elements from the start of the " + inout + " " + math_name + "."]