summaryrefslogtreecommitdiff
path: root/src/clblast.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/clblast.cc')
-rw-r--r--src/clblast.cc49
1 files changed, 49 insertions, 0 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index 66202adb..23046b01 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -26,6 +26,7 @@
// BLAS level-3 includes
#include "internal/routines/xgemm.h"
#include "internal/routines/xsymm.h"
+#include "internal/routines/xhemm.h"
#include "internal/routines/xsyrk.h"
#include "internal/routines/xherk.h"
#include "internal/routines/xsyr2k.h"
@@ -250,6 +251,54 @@ template StatusCode Symm<double2>(const Layout, const Side, const Triangle,
// =================================================================================================
+// HEMM
+template <typename T>
+StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xhemm<T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoHemm(layout, side, triangle, m, n, alpha,
+ Buffer(a_buffer), a_offset, a_ld,
+ Buffer(b_buffer), b_offset, b_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+}
+template StatusCode Hemm<float2>(const Layout, const Side, const Triangle,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Hemm<double2>(const Layout, const Side, const Triangle,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
+
// SYRK
template <typename T>
StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,