summaryrefslogtreecommitdiff
path: root/src/routines/levelx
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-09 20:38:39 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-09 20:38:39 +0200
commit4e6d30088d7c73f13b0ad6db5794b232add2b735 (patch)
tree105fbb957d41e0206fc3b760ec2ec115515b1779 /src/routines/levelx
parentb60828036122c5fe6e0305963ddc1ada6a2effff (diff)
Changed temporary convgemm implementation to use batched-strided GEMM
Diffstat (limited to 'src/routines/levelx')
-rw-r--r--src/routines/levelx/xconvgemm.cpp54
1 files changed, 30 insertions, 24 deletions
diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp
index 0e59b5be..d3b198a2 100644
--- a/src/routines/levelx/xconvgemm.cpp
+++ b/src/routines/levelx/xconvgemm.cpp
@@ -13,7 +13,7 @@
#include "routines/levelx/xconvgemm.hpp"
#include "routines/levelx/xim2col.hpp"
-#include "routines/level3/xgemm.hpp"
+#include "routines/levelx/xgemmstridedbatched.hpp"
#include <string>
#include <vector>
@@ -54,45 +54,51 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const
const auto padding_w = dilation_w * (kernel_w - 1) + 1;
const auto output_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;
- // Temporary col matrix
+ // Sets other useful variables
const auto patch_size = kernel_h * kernel_w * channels;
const auto num_patches = output_h * output_w;
- const auto col_size = patch_size * num_patches;
- auto col_buffer = Buffer<T>(context_, col_size);
// Approach: im2col + GEMM
// result = GEMM(im2col(image), kernel)
+
+ // Temporary col matrix
+ const auto col_size = patch_size * num_patches * batch_count;
+ auto col_buffer = Buffer<T>(context_, col_size);
+
+ // Loops over each batch
for (auto batch_id = size_t{0}; batch_id < batch_count; ++batch_id) {
// im2col
const auto im_batch_offset = batch_id * channels * height * width + im_offset;
+ const auto col_batch_offset = batch_id * patch_size * num_patches;
auto im2col_event = Event();
auto im2col = Xim2col<T>(queue_, im2col_event.pointer());
im2col.DoIm2col(channels, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
im_buffer, im_batch_offset,
- col_buffer, 0);
+ col_buffer, col_batch_offset);
im2col_event.WaitForCompletion();
-
- // GEMM: C (result) = alpha (1) * A (col) * B (kernel) + beta (0) * C (result)
- const auto m = num_patches;
- const auto n = num_kernels;
- const auto k = patch_size;
- const auto col_gemm_offset = size_t{0}; // A
- const auto kernel_gemm_offset = kernel_offset; // B
- const auto result_gemm_offset = batch_id * num_kernels * output_h * output_w + result_offset; // C
- const auto col_ld = m;
- const auto kernel_ld = k;
- const auto result_ld = m;
- auto gemm_event = Event();
- auto gemm = Xgemm<T>(queue_, gemm_event.pointer());
- gemm.DoGemm(Layout::kColMajor, Transpose::kNo, Transpose::kNo,
- m, n, k, ConstantOne<T>(),
- col_buffer, col_gemm_offset, col_ld,
- kernel_buffer, kernel_gemm_offset, kernel_ld, ConstantZero<T>(),
- result_buffer, result_gemm_offset, result_ld);
- gemm_event.WaitForCompletion();
}
+
+ // GEMM: C (result) = alpha (1) * A (col) * B (kernel) + beta (0) * C (result)
+ const auto m = num_patches;
+ const auto n = num_kernels;
+ const auto k = patch_size;
+ const auto col_ld = m;
+ const auto kernel_ld = k;
+ const auto result_ld = m;
+ const auto col_stride = patch_size * num_patches;
+ const auto kernel_stride = size_t{0}; // applies the same kernel to all
+ const auto result_stride = num_kernels * output_h * output_w;
+ auto gemm_event = Event();
+ auto gemm = XgemmStridedBatched<T>(queue_, gemm_event.pointer());
+ gemm.DoGemmStridedBatched(Layout::kColMajor, Transpose::kNo, Transpose::kNo,
+ m, n, k, ConstantOne<T>(),
+ col_buffer, 0, col_ld, col_stride,
+ kernel_buffer, kernel_offset, kernel_ld, kernel_stride, ConstantZero<T>(),
+ result_buffer, result_offset, result_ld, result_stride,
+ batch_count);
+ gemm_event.WaitForCompletion();
}
// =================================================================================================