summaryrefslogtreecommitdiff
path: root/src/clblast.cc
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-10 07:19:59 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-10 07:19:59 +0200
commit919bba3eaf0feaa83e787aa500d6f0d5169b02b5 (patch)
tree207f61a5336a207306c523c031d8bc302c02bca1 /src/clblast.cc
parent2fe3fe15801f8ef11b38bfd93d7d68fbb37253a1 (diff)
Added the HERK routine, tester, and client
Diffstat (limited to 'src/clblast.cc')
-rw-r--r--src/clblast.cc47
1 files changed, 46 insertions, 1 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index 299d0a18..638bc944 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -27,6 +27,7 @@
#include "internal/routines/xgemm.h"
#include "internal/routines/xsymm.h"
#include "internal/routines/xsyrk.h"
+#include "internal/routines/xherk.h"
#include "internal/routines/xsyr2k.h"
#include "internal/routines/xtrmm.h"
@@ -302,6 +303,50 @@ template StatusCode Syrk<double2>(const Layout, const Triangle, const Transpose,
// =================================================================================================
+// HERK
+template <typename T>
+StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoHerk(layout, triangle, a_transpose, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+}
+template StatusCode Herk<float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Herk<double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
+
// SYR2K
template <typename T>
StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
@@ -456,7 +501,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle,
Buffer(a_buffer), a_offset, a_ld,
Buffer(b_buffer), b_offset, b_ld);
*/
- return StatusCode::kSuccess;
+ return StatusCode::kNotImplemented;
}
template StatusCode Trsm<float>(const Layout, const Side, const Triangle,
const Transpose, const Diagonal,