From 9fb2c61b256ccf66b6a7b6f605008125288d60cf Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Jan 2018 14:27:15 +0100 Subject: Added API and tests for new GemmStridedBatched routine --- src/routines/levelx/xgemmstridedbatched.hpp | 66 +++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 src/routines/levelx/xgemmstridedbatched.hpp (limited to 'src/routines/levelx/xgemmstridedbatched.hpp') diff --git a/src/routines/levelx/xgemmstridedbatched.hpp b/src/routines/levelx/xgemmstridedbatched.hpp new file mode 100644 index 00000000..0dbbcb10 --- /dev/null +++ b/src/routines/levelx/xgemmstridedbatched.hpp @@ -0,0 +1,66 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the XgemmStridedBatched routine. This is a non-blas batched version of GEMM. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGEMMSTRIDEDBATCHED_H_ +#define CLBLAST_ROUTINES_XGEMMSTRIDEDBATCHED_H_ + +#include + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class XgemmStridedBatched: public Routine { +public: + + // Constructor + XgemmStridedBatched(Queue &queue, EventPointer event, const std::string &name = "GEMMSTRIDEDBATCHED"); + + // Templated-precision implementation of the routine + void DoGemmStridedBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, const T beta, + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count); + + // Indirect version of strided batched GEMM (with pre and post-processing kernels) + void BatchedGemmIndirect(const size_t m, const size_t n, const size_t k, const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, const T beta, + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose, + const bool a_conjugate, const bool b_conjugate, + const size_t a_one, const size_t a_two, + const size_t b_one, const size_t b_two, + const size_t c_one, const size_t c_two, + const size_t batch_count); + + // Direct version of strided batched GEMM (no pre and post-processing kernels) + void BatchedGemmDirect(const size_t m, const size_t n, const size_t k, const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, const T beta, + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose, + const bool a_conjugate, const bool b_conjugate, + const size_t batch_count); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGEMMSTRIDEDBATCHED_H_ +#endif -- cgit v1.2.3