summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
committerKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
commit032e3b0cc00a15dd2af8b4fb82d261eb7b086e26 (patch)
treecdcf4d0fc342c9ff92ee7ab3f75b0cdeced46e96 /test
parent90112618daa0d6b24ae3e53203a636d2e908dfba (diff)
Add kernel_mode option to im2col, col2im, and convgemm functions
Diffstat (limited to 'test')
-rw-r--r--test/correctness/misc/override_parameters.cpp1
-rw-r--r--test/correctness/testblas.hpp38
-rw-r--r--test/routines/levelx/xcol2im.hpp14
-rw-r--r--test/routines/levelx/xconvgemm.hpp20
-rw-r--r--test/routines/levelx/xim2col.hpp14
5 files changed, 58 insertions, 29 deletions
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp
index 54229c5e..7ed4faff 100644
--- a/test/correctness/misc/override_parameters.cpp
+++ b/test/correctness/misc/override_parameters.cpp
@@ -60,6 +60,7 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st
args.layout = GetArgument(arguments, help, kArgLayout, Layout::kRowMajor);
args.a_transpose = GetArgument(arguments, help, kArgATransp, Transpose::kNo);
args.b_transpose = GetArgument(arguments, help, kArgBTransp, Transpose::kNo);
+ args.kernel_mode = GetArgument(arguments, help, kArgKernelMode, KernelMode::kCrossCorrelation);
args.alpha = GetArgument(arguments, help, kArgAlpha, GetScalar<T>());
args.beta = GetArgument(arguments, help, kArgBeta, GetScalar<T>());
diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp
index 137df30f..b2dc6e7a 100644
--- a/test/correctness/testblas.hpp
+++ b/test/correctness/testblas.hpp
@@ -63,6 +63,7 @@ class TestBlas: public Tester<T,U> {
static const std::vector<size_t> kNumKernels;
static const std::vector<size_t> kStrideValues;
static const std::vector<size_t> kChannelValues;
+ static const std::vector<KernelMode> kKernelModes;
const std::vector<size_t> kOffsets;
const std::vector<U> kAlphaValues;
const std::vector<U> kBetaValues;
@@ -142,6 +143,7 @@ template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kKern
template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kNumKernels = { 1, 6 };
template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kStrideValues = { 1, 3 };
template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kChannelValues = { 1, 2 };
+template <typename T, typename U> const std::vector<KernelMode> TestBlas<T,U>::kKernelModes = { KernelMode::kCrossCorrelation, KernelMode::kConvolution };
// Test settings for the invalid tests
template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kInvalidIncrements = { 0, 1 };
@@ -168,6 +170,7 @@ static StatusCode ReferenceNotAvailable(const Arguments<U> &, BufferType &, Queu
template <typename C, typename T, typename U>
void handle_remaining_of_options(std::vector<Arguments<U>> &regular_test_vector, Arguments<U> &r_args,
TestBlas<T,U> &tester,
+ const std::vector<KernelMode> &kernel_modes,
const std::vector<size_t> &channelss,
const std::vector<size_t> &heights,
const std::vector<size_t> &widths,
@@ -181,21 +184,23 @@ void handle_remaining_of_options(std::vector<Arguments<U>> &regular_test_vector,
const std::vector<size_t> &dilation_ws,
const std::vector<size_t> &batch_counts,
const std::vector<size_t> &num_kernelss) {
- for (auto &channels: channelss) { r_args.channels = channels;
- for (auto &height: heights) { r_args.height = height;
- for (auto &width: widths) { r_args.width = width;
- for (auto &kernel_h: kernel_hs) { r_args.kernel_h = kernel_h;
- for (auto &kernel_w: kernel_ws) { r_args.kernel_w = kernel_w;
- for (auto &pad_h: pad_hs) { r_args.pad_h = pad_h;
- for (auto &pad_w: pad_ws) { r_args.pad_w = pad_w;
- for (auto &stride_h: stride_hs) { r_args.stride_h = stride_h;
- for (auto &stride_w: stride_ws) { r_args.stride_w = stride_w;
- for (auto &dilation_h: dilation_hs) { r_args.dilation_h = dilation_h;
- for (auto &dilation_w: dilation_ws) { r_args.dilation_w = dilation_w;
- for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count;
- for (auto &num_kernels: num_kernelss) { r_args.num_kernels = num_kernels;
- C::SetSizes(r_args, tester.queue_);
- regular_test_vector.push_back(r_args);
+ for (auto &kernel_mode: kernel_modes) { r_args.kernel_mode = kernel_mode;
+ for (auto &channels: channelss) { r_args.channels = channels;
+ for (auto &height: heights) { r_args.height = height;
+ for (auto &width: widths) { r_args.width = width;
+ for (auto &kernel_h: kernel_hs) { r_args.kernel_h = kernel_h;
+ for (auto &kernel_w: kernel_ws) { r_args.kernel_w = kernel_w;
+ for (auto &pad_h: pad_hs) { r_args.pad_h = pad_h;
+ for (auto &pad_w: pad_ws) { r_args.pad_w = pad_w;
+ for (auto &stride_h: stride_hs) { r_args.stride_h = stride_h;
+ for (auto &stride_w: stride_ws) { r_args.stride_w = stride_w;
+ for (auto &dilation_h: dilation_hs) { r_args.dilation_h = dilation_h;
+ for (auto &dilation_w: dilation_ws) { r_args.dilation_w = dilation_w;
+ for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count;
+ for (auto &num_kernels: num_kernelss) { r_args.num_kernels = num_kernels;
+ C::SetSizes(r_args, tester.queue_);
+ regular_test_vector.push_back(r_args);
+ }
}
}
}
@@ -284,6 +289,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na
auto imax_offsets = std::vector<size_t>{args.imax_offset};
auto alphas = std::vector<U>{args.alpha};
auto betas = std::vector<U>{args.beta};
+ auto kernel_modes = std::vector<KernelMode>{args.kernel_mode};
auto channelss = std::vector<size_t>{args.channels};
auto heights = std::vector<size_t>{args.height};
auto widths = std::vector<size_t>{args.width};
@@ -340,6 +346,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na
if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; }
if (option == kArgAlpha) { alphas = tester.kAlphaValues; }
if (option == kArgBeta) { betas = tester.kBetaValues; }
+ if (option == kArgKernelMode) { kernel_modes = tester.kKernelModes; }
if (option == kArgChannels) { channelss = tester.kChannelValues; }
if (option == kArgHeight) { heights = tester.kMatrixDims; }
if (option == kArgWidth) { widths = tester.kMatrixDims; }
@@ -397,6 +404,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na
for (auto &beta: betas) { r_args.beta = beta;
// Cannot have more for-loops because of MSVC's C1061 error
handle_remaining_of_options<C>(regular_test_vector, r_args, tester,
+ kernel_modes,
channelss, heights, widths, kernel_hs, kernel_ws,
pad_hs, pad_ws, stride_hs, stride_ws,
dilation_hs, dilation_ws,
diff --git a/test/routines/levelx/xcol2im.hpp b/test/routines/levelx/xcol2im.hpp
index 176fceae..c28727e7 100644
--- a/test/routines/levelx/xcol2im.hpp
+++ b/test/routines/levelx/xcol2im.hpp
@@ -31,7 +31,8 @@ public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW,
+ return {kArgKernelMode,
+ kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW,
kArgStrideH, kArgStrideW, kArgDilationH, kArgDilationW,
kArgAOffset, kArgBOffset};
}
@@ -87,7 +88,8 @@ public:
#ifdef OPENCL_API
auto queue_plain = queue();
auto event = cl_event{};
- auto status = Col2im<T>(args.channels, args.height, args.width,
+ auto status = Col2im<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -97,7 +99,8 @@ public:
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
#elif CUDA_API
- auto status = Col2im<T>(args.channels, args.height, args.width,
+ auto status = Col2im<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -167,7 +170,10 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
for (auto w_id = size_t{0}; w_id < col_w; ++w_id) { // image width
// Reads the input value
- const auto kernel_index = kw_id + args.kernel_w * kh_id;
+ const auto kernel_index
+ = (args.kernel_mode == KernelMode::kConvolution)
+ ? args.kernel_h * args.kernel_w - kw_id - args.kernel_w * kh_id - 1
+ : kw_id + args.kernel_w * kh_id;
const auto patch_index = w_id + col_w * h_id;
const auto col_index = patch_index + kernel_index * col_w * col_h +
c_id * col_w * col_h * args.kernel_h * args.kernel_w;
diff --git a/test/routines/levelx/xconvgemm.hpp b/test/routines/levelx/xconvgemm.hpp
index 7fa4e701..e67b8174 100644
--- a/test/routines/levelx/xconvgemm.hpp
+++ b/test/routines/levelx/xconvgemm.hpp
@@ -91,7 +91,8 @@ public:
#ifdef OPENCL_API
auto queue_plain = queue();
auto event = cl_event{};
- auto status = Convgemm<T>(args.channels, args.height, args.width,
+ auto status = Convgemm<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -103,7 +104,8 @@ public:
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
#elif CUDA_API
- auto status = Convgemm<T>(args.channels, args.height, args.width,
+ auto status = Convgemm<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -189,10 +191,16 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
const auto input_value = buffers_host.a_mat[input_index + args.a_offset];
// Multiplies with the kernel tensor
- const auto kernel_index = kw_id + args.kernel_w * (
- kh_id + args.kernel_h * (
- ci_id + args.channels * (
- co_id)));
+ const auto kernel_index
+ = (args.kernel_mode == KernelMode::kConvolution)
+ ? (args.kernel_w - kw_id - 1) + args.kernel_w * (
+ (args.kernel_h - kh_id - 1) + args.kernel_h * (
+ ci_id + args.channels * (
+ co_id)))
+ : kw_id + args.kernel_w * (
+ kh_id + args.kernel_h * (
+ ci_id + args.channels * (
+ co_id)));
const auto kernel_value = buffers_host.b_mat[kernel_index + args.b_offset];
result += input_value * kernel_value;
diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp
index acf7998b..2a3577c3 100644
--- a/test/routines/levelx/xim2col.hpp
+++ b/test/routines/levelx/xim2col.hpp
@@ -31,7 +31,8 @@ public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW,
+ return {kArgKernelMode,
+ kArgChannels, kArgHeight, kArgWidth, kArgKernelH, kArgKernelW, kArgPadH, kArgPadW,
kArgStrideH, kArgStrideW, kArgDilationH, kArgDilationW,
kArgAOffset, kArgBOffset};
}
@@ -87,7 +88,8 @@ public:
#ifdef OPENCL_API
auto queue_plain = queue();
auto event = cl_event{};
- auto status = Im2col<T>(args.channels, args.height, args.width,
+ auto status = Im2col<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -97,7 +99,8 @@ public:
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
#elif CUDA_API
- auto status = Im2col<T>(args.channels, args.height, args.width,
+ auto status = Im2col<T>(args.kernel_mode,
+ args.channels, args.height, args.width,
args.kernel_h, args.kernel_w,
args.pad_h, args.pad_w,
args.stride_h, args.stride_w,
@@ -175,7 +178,10 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
}
// Sets the output value
- const auto kernel_index = kw_id + args.kernel_w * kh_id;
+ const auto kernel_index
+ = (args.kernel_mode == KernelMode::kConvolution)
+ ? args.kernel_h * args.kernel_w - kw_id - args.kernel_w * kh_id - 1
+ : kw_id + args.kernel_w * kh_id;
const auto patch_index = w_id + col_w * h_id;
const auto col_index = patch_index + kernel_index * col_w * col_h +
c_id * col_w * col_h * args.kernel_h * args.kernel_w;