diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-09 17:42:59 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-09 17:42:59 +0200 |
commit | cc95d4fa03f4f7f4ed719af46b64029ba19b8142 (patch) | |
tree | f8b6026e1d163fa629dae7225989aa6230986009 /src/routines/levelx/xconvgemm.cpp | |
parent | 52e6195628c163e9bf2c78fa72e4e2f43c0764da (diff) |
Implemented convolution as im2col + GEMM
Diffstat (limited to 'src/routines/levelx/xconvgemm.cpp')
-rw-r--r-- | src/routines/levelx/xconvgemm.cpp | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp index 2676dbda..0e59b5be 100644 --- a/src/routines/levelx/xconvgemm.cpp +++ b/src/routines/levelx/xconvgemm.cpp @@ -12,6 +12,8 @@ // ================================================================================================= #include "routines/levelx/xconvgemm.hpp" +#include "routines/levelx/xim2col.hpp" +#include "routines/level3/xgemm.hpp" #include <string> #include <vector> @@ -52,7 +54,45 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const const auto padding_w = dilation_w * (kernel_w - 1) + 1; const auto output_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1; - throw BLASError(StatusCode::kNotImplemented); + // Temporary col matrix + const auto patch_size = kernel_h * kernel_w * channels; + const auto num_patches = output_h * output_w; + const auto col_size = patch_size * num_patches; + auto col_buffer = Buffer<T>(context_, col_size); + + // Approach: im2col + GEMM + // result = GEMM(im2col(image), kernel) + for (auto batch_id = size_t{0}; batch_id < batch_count; ++batch_id) { + + // im2col + const auto im_batch_offset = batch_id * channels * height * width + im_offset; + auto im2col_event = Event(); + auto im2col = Xim2col<T>(queue_, im2col_event.pointer()); + im2col.DoIm2col(channels, height, width, kernel_h, kernel_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + im_buffer, im_batch_offset, + col_buffer, 0); + im2col_event.WaitForCompletion(); + + // GEMM: C (result) = alpha (1) * A (col) * B (kernel) + beta (0) * C (result) + const auto m = num_patches; + const auto n = num_kernels; + const auto k = patch_size; + const auto col_gemm_offset = size_t{0}; // A + const auto kernel_gemm_offset = kernel_offset; // B + const auto result_gemm_offset = batch_id * num_kernels * output_h * output_w + result_offset; // C + const auto col_ld = m; + const auto kernel_ld = k; + const auto result_ld = m; + auto gemm_event = Event(); + auto gemm = Xgemm<T>(queue_, gemm_event.pointer()); + gemm.DoGemm(Layout::kColMajor, Transpose::kNo, Transpose::kNo, + m, n, k, ConstantOne<T>(), + col_buffer, col_gemm_offset, col_ld, + kernel_buffer, kernel_gemm_offset, kernel_ld, ConstantZero<T>(), + result_buffer, result_gemm_offset, result_ld); + gemm_event.WaitForCompletion(); + } } // ================================================================================================= |