3 files changed, 64 insertions, 10 deletions
diff --git a/src/routines/levelx/xgemmbatched.cpp b/src/routines/levelx/xgemmbatched.cpp
index b12b8734..0d7ae5ab 100644
--- a/src/routines/levelx/xgemmbatched.cpp
+++ b/src/routines/levelx/xgemmbatched.cpp
@@ -79,11 +79,9 @@ void XgemmBatched<T>::DoGemmBatched(const Layout layout, const Transpose a_trans
                              gemm_kernel_id);
 
   // Tests the matrices for validity
-  for (auto batch = size_t{0}; batch < batch_count; ++batch) {
-    TestMatrixA(a_one, a_two, a_buffer, a_offsets[batch], a_ld, false); // don't test for invalid LD
-    TestMatrixB(b_one, b_two, b_buffer, b_offsets[batch], b_ld, false); // don't test for invalid LD
-    TestMatrixC(c_one, c_two, c_buffer, c_offsets[batch], c_ld);
-  }
+  TestBatchedMatrixA(a_one, a_two, a_buffer, a_offsets, a_ld, false); // don't test for invalid LD
+  TestBatchedMatrixB(b_one, b_two, b_buffer, b_offsets, b_ld, false); // don't test for invalid LD
+  TestBatchedMatrixC(c_one, c_two, c_buffer, c_offsets, c_ld);
 
   // Upload the scalar arguments to the device
   auto alphas_device = Buffer<T>(context_, BufferAccess::kReadWrite, batch_count);
diff --git a/src/routines/levelx/xgemmstridedbatched.cpp b/src/routines/levelx/xgemmstridedbatched.cpp
index 5d8945ce..8fb83141 100644
--- a/src/routines/levelx/xgemmstridedbatched.cpp
+++ b/src/routines/levelx/xgemmstridedbatched.cpp
@@ -78,11 +78,9 @@ void XgemmStridedBatched<T>::DoGemmStridedBatched(const Layout layout, const Tra
                              gemm_kernel_id);
 
   // Tests the matrices for validity
-  for (auto batch = size_t{0}; batch < batch_count; ++batch) {
-    TestMatrixA(a_one, a_two, a_buffer, a_offset + a_stride * batch, a_ld);
-    TestMatrixB(b_one, b_two, b_buffer, b_offset + b_stride * batch, b_ld);
-    TestMatrixC(c_one, c_two, c_buffer, c_offset + c_stride * batch, c_ld);
-  }
+  TestStridedBatchedMatrixA(a_one, a_two, a_buffer, a_offset, a_stride, batch_count, a_ld);
+  TestStridedBatchedMatrixB(b_one, b_two, b_buffer, b_offset, b_stride, batch_count, b_ld);
+  TestStridedBatchedMatrixC(c_one, c_two, c_buffer, c_offset, c_stride, batch_count, c_ld);
 
   // Selects which version of the batched GEMM to run
   if (do_gemm_direct) { // single generic kernel
diff --git a/src/utilities/buffer_test.hpp b/src/utilities/buffer_test.hpp
index fd071434..4a2a2c95 100644
--- a/src/utilities/buffer_test.hpp
+++ b/src/utilities/buffer_test.hpp
@@ -15,6 +15,9 @@
 #ifndef CLBLAST_BUFFER_TEST_H_
 #define CLBLAST_BUFFER_TEST_H_
 
+#include <algorithm>
+#include <vector>
+
 #include "utilities/utilities.hpp"
 
 namespace clblast {
@@ -105,6 +108,61 @@ void TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offse
 }
 
 // =================================================================================================
+
+// Tests matrix 'A' for validity in a batched setting
+template <typename T>
+void TestBatchedMatrixA(const size_t one, const size_t two, const Buffer<T>& buffer,
+                        const std::vector<size_t> &offsets, const size_t ld, const bool test_lead_dim = true) {
+  const auto max_offset = *std::max_element(offsets.begin(), offsets.end());
+  TestMatrixA(one, two, buffer, max_offset, ld, test_lead_dim);
+}
+
+// Tests matrix 'B' for validity in a batched setting
+template <typename T>
+void TestBatchedMatrixB(const size_t one, const size_t two, const Buffer<T>& buffer,
+                        const std::vector<size_t>& offsets, const size_t ld, const bool test_lead_dim = true) {
+  const auto max_offset = *std::max_element(offsets.begin(), offsets.end());
+  TestMatrixB(one, two, buffer, max_offset, ld, test_lead_dim);
+}
+
+// Tests matrix 'C' for validity in a batched setting
+template <typename T>
+void TestBatchedMatrixC(const size_t one, const size_t two, const Buffer<T>& buffer,
+                        const std::vector<size_t>& offsets, const size_t ld) {
+  const auto max_offset = *std::max_element(offsets.begin(), offsets.end());
+  TestMatrixC(one, two, buffer, max_offset, ld);
+}
+
+// =================================================================================================
+
+// Tests matrix 'A' for validity in a strided batched setting
+template <typename T>
+void TestStridedBatchedMatrixA(const size_t one, const size_t two, const Buffer<T>& buffer,
+                               const size_t offset, const size_t stride, const size_t batch_count,
+                               const size_t ld, const bool test_lead_dim = true) {
+  const auto last_batch_offset = (batch_count - 1) * stride;
+  TestMatrixA(one, two, buffer, offset + last_batch_offset, ld, test_lead_dim);
+}
+
+// Tests matrix 'B' for validity in a strided batched setting
+template <typename T>
+void TestStridedBatchedMatrixB(const size_t one, const size_t two, const Buffer<T>& buffer,
+                               const size_t offset, const size_t stride, const size_t batch_count,
+                               const size_t ld, const bool test_lead_dim = true) {
+  const auto last_batch_offset = (batch_count - 1) * stride;
+  TestMatrixB(one, two, buffer, offset + last_batch_offset, ld, test_lead_dim);
+}
+
+// Tests matrix 'C' for validity in a strided batched setting
+template <typename T>
+void TestStridedBatchedMatrixC(const size_t one, const size_t two, const Buffer<T>& buffer,
+                               const size_t offset, const size_t stride, const size_t batch_count,
+                               const size_t ld) {
+  const auto last_batch_offset = (batch_count - 1) * stride;
+  TestMatrixC(one, two, buffer, offset + last_batch_offset, ld);
+}
+
+// =================================================================================================
 } // namespace clblast
 
 // CLBLAST_BUFFER_TEST_H_