diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/correctness/misc/override_parameters.cpp | 1 | ||||
-rw-r--r-- | test/correctness/testblas.hpp | 38 | ||||
-rw-r--r-- | test/routines/levelx/xcol2im.hpp | 14 | ||||
-rw-r--r-- | test/routines/levelx/xconvgemm.hpp | 20 | ||||
-rw-r--r-- | test/routines/levelx/xim2col.hpp | 14 |
5 files changed, 58 insertions, 29 deletions
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index 54229c5e..7ed4faff 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -60,6 +60,7 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st args.layout = GetArgument(arguments, help, kArgLayout, Layout::kRowMajor); args.a_transpose = GetArgument(arguments, help, kArgATransp, Transpose::kNo); args.b_transpose = GetArgument(arguments, help, kArgBTransp, Transpose::kNo); + args.kernel_mode = GetArgument(arguments, help, kArgKernelMode, KernelMode::kCrossCorrelation); args.alpha = GetArgument(arguments, help, kArgAlpha, GetScalar<T>()); args.beta = GetArgument(arguments, help, kArgBeta, GetScalar<T>()); diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp index 137df30f..b2dc6e7a 100644 --- a/test/correctness/testblas.hpp +++ b/test/correctness/testblas.hpp @@ -63,6 +63,7 @@ class TestBlas: public Tester<T,U> { static const std::vector<size_t> kNumKernels; static const std::vector<size_t> kStrideValues; static const std::vector<size_t> kChannelValues; + static const std::vector<KernelMode> kKernelModes; const std::vector<size_t> kOffsets; const std::vector<U> kAlphaValues; const std::vector<U> kBetaValues; @@ -142,6 +143,7 @@ template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kKern template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kNumKernels = { 1, 6 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kStrideValues = { 1, 3 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kChannelValues = { 1, 2 }; +template <typename T, typename U> const std::vector<KernelMode> TestBlas<T,U>::kKernelModes = { KernelMode::kCrossCorrelation, KernelMode::kConvolution }; // Test settings for the invalid tests template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kInvalidIncrements = { 0, 1 }; @@ -168,6 +170,7 @@ static StatusCode ReferenceNotAvailable(const Arguments<U> &, BufferType &, Queu template <typename C, typename T, typename U> void handle_remaining_of_options(std::vector<Arguments<U>> ®ular_test_vector, Arguments<U> &r_args, TestBlas<T,U> &tester, + const std::vector<KernelMode> &kernel_modes, const std::vector<size_t> &channelss, const std::vector<size_t> &heights, const std::vector<size_t> &widths, @@ -181,21 +184,23 @@ void handle_remaining_of_options(std::vector<Arguments<U>> ®ular_test_vector, const std::vector<size_t> &dilation_ws, const std::vector<size_t> &batch_counts, const std::vector<size_t> &num_kernelss) { - for (auto &channels: channelss) { r_args.channels = channels; - for (auto &height: heights) { r_args.height = height; - for (auto &width: widths) { r_args.width = width; - for (auto &kernel_h: kernel_hs) { r_args.kernel_h = kernel_h; - for (auto &kernel_w: kernel_ws) { r_args.kernel_w = kernel_w; - for (auto &pad_h: pad_hs) { r_args.pad_h = pad_h; - for (auto &pad_w: pad_ws) { r_args.pad_w = pad_w; - for (auto &stride_h: stride_hs) { r_args.stride_h = stride_h; - for (auto &stride_w: stride_ws) { r_args.stride_w = stride_w; - for (auto &dilation_h: dilation_hs) { r_args.dilation_h = dilation_h; - for (auto &dilation_w: dilation_ws) { r_args.dilation_w = dilation_w; - for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count; - for (auto &num_kernels: num_kernelss) { r_args.num_kernels = num_kernels; - C::SetSizes(r_args, tester.queue_); - regular_test_vector.push_back(r_args); + for (auto &kernel_mode: kernel_modes) { r_args.kernel_mode = kernel_mode; + for (auto &channels: channelss) { r_args.channels = channels; + for (auto &height: heights) { r_args.height = height; + for (auto &width: widths) { r_args.width = width; + for (auto &kernel_h: kernel_hs) { r_args.kernel_h = kernel_h; + for (auto &kernel_w: kernel_ws) { r_args.kernel_w = kernel_w; + for (auto &pad_h: pad_hs) { r_args.pad_h = pad_h; + for (auto &pad_w: pad_ws) { r_args.pad_w = pad_w; + for (auto &stride_h: stride_hs) { r_args.stride_h = stride_h; + for (auto &stride_w: stride_ws) { r_args.stride_w = stride_w; + for (auto &dilation_h: dilation_hs) { r_args.dilation_h = dilation_h; + for (auto &dilation_w: dilation_ws) { r_args.dilation_w = dilation_w; + for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count; + for (auto &num_kernels: num_kernelss) { r_args.num_kernels = num_kernels; + C::SetSizes(r_args, tester.queue_); + regular_test_vector.push_back(r_args); + } } } } @@ -284,6 +289,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na auto imax_offsets = std::vector<size_t>{args.imax_offset}; auto alphas = std::vector<U>{args.alpha}; auto betas = std::vector<U>{args.beta}; + auto kernel_modes = std::vector<KernelMode>{args.kernel_mode}; auto channelss = std::vector<size_t>{args.channels}; auto heights = std::vector<size_t>{args.height}; auto widths = std::vector<size_t>{args.width}; @@ -340,6 +346,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; } if (option == kArgAlpha) { alphas = tester.kAlphaValues; } if (option == kArgBeta) { betas = tester.kBetaValues; } + if (option == kArgKernelMode) { kernel_modes = tester.kKernelModes; } if (option == kArgChannels) { channelss = tester.kChannelValues; } if (option == kArgHeight) { heights = tester.kMatrixDims; } if (option == kArgWidth) { widths = tester.kMatrixDims; } @@ -397,6 +404,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na for (auto &beta: betas) { r_args.beta = beta; // Cannot have more for-loops because of MSVC's C1061 error handle_remaining_of_options<C>(regular_test_vector, r_args, tester, + kernel_modes, channelss, heights, widths, kernel_hs, kernel_ws, pad_hs, pad_ws, stride_hs, stride_ws, dilation_hs, dilation_ws, diff --git a/test/routines/levelx/xcol2im.hpp b/test/routines/levelx/xcol2im.hpp index 176fceae..c28727e7 100644 --- a/test/routines/levelx/xcol2im.hpp +++ b/test/routines/levelx/xcol2im.hpp @@ -31,7 +31,8 @@ public: // The list of arguments relevant for this routine static std::vector<std::string> GetOptions() { - return {kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW, + return {kArgKernelMode, + kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW, kArgStrideH, kArgStrideW, kArgDilationH, kArgDilationW, kArgAOffset, kArgBOffset}; } @@ -87,7 +88,8 @@ public: #ifdef OPENCL_API auto queue_plain = queue(); auto event = cl_event{}; - auto status = Col2im<T>(args.channels, args.height, args.width, + auto status = Col2im<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -97,7 +99,8 @@ public: &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } #elif CUDA_API - auto status = Col2im<T>(args.channels, args.height, args.width, + auto status = Col2im<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -167,7 +170,10 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) for (auto w_id = size_t{0}; w_id < col_w; ++w_id) { // image width // Reads the input value - const auto kernel_index = kw_id + args.kernel_w * kh_id; + const auto kernel_index + = (args.kernel_mode == KernelMode::kConvolution) + ? args.kernel_h * args.kernel_w - kw_id - args.kernel_w * kh_id - 1 + : kw_id + args.kernel_w * kh_id; const auto patch_index = w_id + col_w * h_id; const auto col_index = patch_index + kernel_index * col_w * col_h + c_id * col_w * col_h * args.kernel_h * args.kernel_w; diff --git a/test/routines/levelx/xconvgemm.hpp b/test/routines/levelx/xconvgemm.hpp index 7fa4e701..e67b8174 100644 --- a/test/routines/levelx/xconvgemm.hpp +++ b/test/routines/levelx/xconvgemm.hpp @@ -91,7 +91,8 @@ public: #ifdef OPENCL_API auto queue_plain = queue(); auto event = cl_event{}; - auto status = Convgemm<T>(args.channels, args.height, args.width, + auto status = Convgemm<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -103,7 +104,8 @@ public: &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } #elif CUDA_API - auto status = Convgemm<T>(args.channels, args.height, args.width, + auto status = Convgemm<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -189,10 +191,16 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) const auto input_value = buffers_host.a_mat[input_index + args.a_offset]; // Multiplies with the kernel tensor - const auto kernel_index = kw_id + args.kernel_w * ( - kh_id + args.kernel_h * ( - ci_id + args.channels * ( - co_id))); + const auto kernel_index + = (args.kernel_mode == KernelMode::kConvolution) + ? (args.kernel_w - kw_id - 1) + args.kernel_w * ( + (args.kernel_h - kh_id - 1) + args.kernel_h * ( + ci_id + args.channels * ( + co_id))) + : kw_id + args.kernel_w * ( + kh_id + args.kernel_h * ( + ci_id + args.channels * ( + co_id))); const auto kernel_value = buffers_host.b_mat[kernel_index + args.b_offset]; result += input_value * kernel_value; diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp index acf7998b..2a3577c3 100644 --- a/test/routines/levelx/xim2col.hpp +++ b/test/routines/levelx/xim2col.hpp @@ -31,7 +31,8 @@ public: // The list of arguments relevant for this routine static std::vector<std::string> GetOptions() { - return {kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW, + return {kArgKernelMode, + kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW, kArgStrideH, kArgStrideW, kArgDilationH, kArgDilationW, kArgAOffset, kArgBOffset}; } @@ -87,7 +88,8 @@ public: #ifdef OPENCL_API auto queue_plain = queue(); auto event = cl_event{}; - auto status = Im2col<T>(args.channels, args.height, args.width, + auto status = Im2col<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -97,7 +99,8 @@ public: &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } #elif CUDA_API - auto status = Im2col<T>(args.channels, args.height, args.width, + auto status = Im2col<T>(args.kernel_mode, + args.channels, args.height, args.width, args.kernel_h, args.kernel_w, args.pad_h, args.pad_w, args.stride_h, args.stride_w, @@ -175,7 +178,10 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) } // Sets the output value - const auto kernel_index = kw_id + args.kernel_w * kh_id; + const auto kernel_index + = (args.kernel_mode == KernelMode::kConvolution) + ? args.kernel_h * args.kernel_w - kw_id - args.kernel_w * kh_id - 1 + : kw_id + args.kernel_w * kh_id; const auto patch_index = w_id + col_w * h_id; const auto col_index = patch_index + kernel_index * col_w * col_h + c_id * col_w * col_h * args.kernel_h * args.kernel_w; |