From 153ac06cf262d2680d0152933156b1d1e15b3f86 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 31 Dec 2018 13:19:58 +0100 Subject: Added the forgotten batch dimension to the tuner to get correct kernel executions --- src/tuning/kernels/xconvgemm.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/tuning/kernels/xconvgemm.hpp b/src/tuning/kernels/xconvgemm.hpp index 9ba70f5e..10dc8ba6 100644 --- a/src/tuning/kernels/xconvgemm.hpp +++ b/src/tuning/kernels/xconvgemm.hpp @@ -86,10 +86,10 @@ TunerSettings XConvGemmGetTunerSettings(const int, const Arguments &args) { settings.outputs = {4}; // Sets the base thread configuration - settings.global_size = {num_patches, args.num_kernels}; + settings.global_size = {num_patches, args.num_kernels, args.batch_count}; settings.global_size_ref = settings.global_size; - settings.local_size = {1, 1}; - settings.local_size_ref = {8, 8}; + settings.local_size = {1, 1, 1}; + settings.local_size_ref = {8, 8, 1}; // Transforms the thread configuration based on the parameters settings.mul_local = {{"MDIMCD", "NDIMCD"}}; @@ -161,12 +161,12 @@ void XConvGemmSetArguments(const int, Kernel &kernel, const Arguments &args, kernel.SetArgument(1, static_cast(args.num_kernels)); kernel.SetArgument(2, static_cast(patch_size)); kernel.SetArgument(3, buffers[3]()); // 3 == B matrix ==> kernel buffer - kernel.SetArgument(4, 0); // c_offset + kernel.SetArgument(4, 0); // kernel offset kernel.SetArgument(5, buffers[4]()); // 4 == C matrix ==> result buffer - kernel.SetArgument(6, 0); // c_offset + kernel.SetArgument(6, 0); // result offset kernel.SetArgument(7, static_cast(result_stride)); kernel.SetArgument(8, buffers[2]()); // 2 == A matrix ==> image buffer - kernel.SetArgument(9, 0); // c_offset + kernel.SetArgument(9, 0); // image offset kernel.SetArgument(10, static_cast(args.height)); kernel.SetArgument(11, static_cast(args.width)); kernel.SetArgument(12, static_cast(args.channels)); -- cgit v1.2.3