summaryrefslogtreecommitdiff
path: root/src/clblast.cc
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-10 20:59:20 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-10 20:59:20 +0200
commitb02876d6e9f711369474219576e7bcbebdb10e1c (patch)
tree9dccab7ef616aad95b5142a8d4b886660d49c0c2 /src/clblast.cc
parent919bba3eaf0feaa83e787aa500d6f0d5169b02b5 (diff)
Added the HER2K routine, tester, and client
Diffstat (limited to 'src/clblast.cc')
-rw-r--r--src/clblast.cc59
1 files changed, 54 insertions, 5 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index 638bc944..00a90707 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -29,6 +29,7 @@
#include "internal/routines/xsyrk.h"
#include "internal/routines/xherk.h"
#include "internal/routines/xsyr2k.h"
+#include "internal/routines/xher2k.h"
#include "internal/routines/xtrmm.h"
namespace clblast {
@@ -350,11 +351,11 @@ template StatusCode Herk<double>(const Layout, const Triangle, const Transpose,
// SYR2K
template <typename T>
StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
- const size_t n, const size_t k, const T alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event) {
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
auto queue_cpp = CommandQueue(*queue);
auto event_cpp = Event(*event);
auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
@@ -407,6 +408,54 @@ template StatusCode Syr2k<double2>(const Layout, const Triangle, const Transpose
// =================================================================================================
+// SYR2K
+template <typename T, typename U>
+StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const U beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xher2k<T,U>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoHer2k(layout, triangle, ab_transpose, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld,
+ Buffer(b_buffer), b_offset, b_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+}
+template StatusCode Her2k<float2,float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Her2k<double2,double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
+
// TRMM
template <typename T>
StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle,