summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clblast.h25
-rw-r--r--src/clblast.cc122
-rw-r--r--test/wrapper_clblas.h126
3 files changed, 269 insertions, 4 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 231348b8..da504a0b 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -107,7 +107,7 @@ StatusCode Gemv(const Layout layout, const Transpose transpose_a,
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
-// Templated-precision generalized matrix-matrix multiplication: SGEMM/DGEMM
+// Templated-precision generalized matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM
template <typename T>
StatusCode Gemm(const Layout layout, const Transpose transpose_a, const Transpose transpose_b,
const size_t m, const size_t n, const size_t k,
@@ -118,7 +118,7 @@ StatusCode Gemm(const Layout layout, const Transpose transpose_a, const Transpos
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
-// Templated-precision symmetric matrix-matrix multiplication: SSYMM/DSYMM
+// Templated-precision symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM
template <typename T>
StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -129,6 +129,27 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
+// Templated-precision rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK
+template <typename T>
+StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose transpose_a,
+ const size_t n, const size_t k,
+ const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Templated-precision rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K
+template <typename T>
+StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose transpose_ab,
+ const size_t n, const size_t k,
+ const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
// =================================================================================================
} // namespace clblast
diff --git a/src/clblast.cc b/src/clblast.cc
index bb0091a3..e0d085a9 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -209,7 +209,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ kernel_source);
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -244,4 +244,124 @@ template StatusCode Symm<double2>(const Layout, const Side, const Triangle,
cl_command_queue*, cl_event*);
// =================================================================================================
+
+// SYRK
+template <typename T>
+StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose transpose_a,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ /*
+ auto routine = Xsyrk<T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoSyrk(layout, triangle, transpose_a, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+ */
+ return StatusCode::kSuccess;
+}
+template StatusCode Syrk<float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<float2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t, const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<double2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t, const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
+
+// SYR2K
+template <typename T>
+StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose transpose_ab,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ /*
+ auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoSyr2k(layout, triangle, transpose_ab, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld,
+ Buffer(b_buffer), b_offset, b_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+ */
+ return StatusCode::kSuccess;
+}
+template StatusCode Syr2k<float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<float2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<double2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
} // namespace clblast
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 093a8742..d6df0835 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -201,7 +201,7 @@ clblasStatus clblasXgemm(
num_queues, queues, num_wait_events, wait_events, events);
}
-// This calls {clblasSsymm, clblasDsymm} with the arguments forwarded.
+// This calls {clblasSsymm, clblasDsymm, clblasCsymm, clblasZsymm} with the arguments forwarded.
clblasStatus clblasXsymm(
clblasOrder layout, clblasSide side, clblasUplo triangle,
size_t m, size_t n, float alpha,
@@ -267,6 +267,130 @@ clblasStatus clblasXsymm(
num_queues, queues, num_wait_events, wait_events, events);
}
+// This calls {clblasSsyrk, clblasDsyrk, clblasCsyrk, clblasZsyrk} with the arguments forwarded.
+clblasStatus clblasXsyrk(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a,
+ size_t n, size_t k, float alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld, float beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSsyrk(layout, triangle, tran_a,
+ n, k, alpha,
+ a_mat, a_offset, a_ld, beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyrk(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a,
+ size_t n, size_t k, double alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld, double beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDsyrk(layout, triangle, tran_a,
+ n, k, alpha,
+ a_mat, a_offset, a_ld, beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyrk(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a,
+ size_t n, size_t k, float2 alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld, float2 beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}};
+ auto cl_beta = cl_float2{{beta.real(), beta.imag()}};
+ return clblasCsyrk(layout, triangle, tran_a,
+ n, k, cl_alpha,
+ a_mat, a_offset, a_ld, cl_beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyrk(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a,
+ size_t n, size_t k, double2 alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld, double2 beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}};
+ auto cl_beta = cl_double2{{beta.real(), beta.imag()}};
+ return clblasZsyrk(layout, triangle, tran_a,
+ n, k, cl_alpha,
+ a_mat, a_offset, a_ld, cl_beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
+// This calls {clblasSsyr2k, clblasDsyr2k, clblasCsyr2k, clblasZsyr2k} with the arguments forwarded.
+clblasStatus clblasXsyr2k(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab,
+ size_t n, size_t k, float alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld,
+ const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSsyr2k(layout, triangle, tran_ab,
+ n, k, alpha,
+ a_mat, a_offset, a_ld,
+ b_mat, b_offset, b_ld, beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyr2k(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab,
+ size_t n, size_t k, double alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld,
+ const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDsyr2k(layout, triangle, tran_ab,
+ n, k, alpha,
+ a_mat, a_offset, a_ld,
+ b_mat, b_offset, b_ld, beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyr2k(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab,
+ size_t n, size_t k, float2 alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld,
+ const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}};
+ auto cl_beta = cl_float2{{beta.real(), beta.imag()}};
+ return clblasCsyr2k(layout, triangle, tran_ab,
+ n, k, cl_alpha,
+ a_mat, a_offset, a_ld,
+ b_mat, b_offset, b_ld, cl_beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXsyr2k(
+ clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab,
+ size_t n, size_t k, double2 alpha,
+ const cl_mem a_mat, size_t a_offset, size_t a_ld,
+ const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta,
+ cl_mem c_mat, size_t c_offset, size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}};
+ auto cl_beta = cl_double2{{beta.real(), beta.imag()}};
+ return clblasZsyr2k(layout, triangle, tran_ab,
+ n, k, cl_alpha,
+ a_mat, a_offset, a_ld,
+ b_mat, b_offset, b_ld, cl_beta,
+ c_mat, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
// =================================================================================================
} // namespace clblast