summaryrefslogtreecommitdiff
path: root/src/tuning/kernels
diff options
context:
space:
mode:
authorKoichi Akabe <vbkaisetsu@gmail.com>2018-12-18 14:05:25 +0900
committerKoichi Akabe <vbkaisetsu@gmail.com>2018-12-18 14:05:25 +0900
commita8e6f813ddb7a8f608077d035583114fd2e763dd (patch)
tree22b29669d2f9d977aa5a597b05fde5baed662720 /src/tuning/kernels
parent1f0cd618247971f0803aff94ddf795d9e3d19428 (diff)
Fix the xconvgemm tuner
Diffstat (limited to 'src/tuning/kernels')
-rw-r--r--src/tuning/kernels/xconvgemm.hpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/tuning/kernels/xconvgemm.hpp b/src/tuning/kernels/xconvgemm.hpp
index 83c93860..9ba70f5e 100644
--- a/src/tuning/kernels/xconvgemm.hpp
+++ b/src/tuning/kernels/xconvgemm.hpp
@@ -44,8 +44,8 @@ TunerDefaults XConvGemmGetTunerDefaults(const int) {
settings.options = {kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW,
kArgNumKernels, kArgBatchCount, kArgFraction};
settings.channels = 32;
- settings.height = 64;
- settings.width = 64;
+ settings.height = 66;
+ settings.width = 66; // num_patches = 64x64 = 4096
settings.kernel_h = 3;
settings.kernel_w = 3;
settings.num_kernels = 32;
@@ -62,7 +62,7 @@ TunerSettings XConvGemmGetTunerSettings(const int, const Arguments<T> &args) {
// Identification of the kernel
settings.kernel_family = "xconvgemm";
- settings.kernel_name = "Xconvgemm";
+ settings.kernel_name = "XconvgemmNormal";
settings.sources =
"#define ROUTINE_CONVGEMM"
#include "../src/kernels/level3/xgemm_direct_part1.opencl"
@@ -79,7 +79,7 @@ TunerSettings XConvGemmGetTunerSettings(const int, const Arguments<T> &args) {
// Buffer sizes
settings.size_a = args.batch_count * args.channels * args.height * args.width;
settings.size_b = args.num_kernels * args.channels * args.kernel_h * args.kernel_w;
- settings.size_a = args.batch_count * args.num_kernels * OutputHeight(args) * OutputWidth(args);
+ settings.size_c = args.batch_count * args.num_kernels * OutputHeight(args) * OutputWidth(args);
// Inputs and outputs IDs (X:0, Y:1, A:2, B:3, C:4, temp:5)
settings.inputs = {2, 3, 4};