diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-21 12:44:03 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-21 12:44:03 +0200 |
commit | e3829c1067814c0aa83ab440fa431d98837aeeda (patch) | |
tree | 18516e3bc42bc71746312ea9efbcd677d5d71e2b | |
parent | ea7da6a49758af50302be040ab7a97a7a8c0f692 (diff) |
Added prototypes of SYRK and SYR2K
-rw-r--r-- | include/clblast.h | 25 | ||||
-rw-r--r-- | src/clblast.cc | 122 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 126 |
3 files changed, 269 insertions, 4 deletions
diff --git a/include/clblast.h b/include/clblast.h index 231348b8..da504a0b 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -107,7 +107,7 @@ StatusCode Gemv(const Layout layout, const Transpose transpose_a, // ================================================================================================= // BLAS level-3 (matrix-matrix) routines -// Templated-precision generalized matrix-matrix multiplication: SGEMM/DGEMM +// Templated-precision generalized matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM template <typename T> StatusCode Gemm(const Layout layout, const Transpose transpose_a, const Transpose transpose_b, const size_t m, const size_t n, const size_t k, @@ -118,7 +118,7 @@ StatusCode Gemm(const Layout layout, const Transpose transpose_a, const Transpos cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event); -// Templated-precision symmetric matrix-matrix multiplication: SSYMM/DSYMM +// Templated-precision symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM template <typename T> StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, @@ -129,6 +129,27 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event); +// Templated-precision rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK +template <typename T> +StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose transpose_a, + const size_t n, const size_t k, + const T alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const T beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Templated-precision rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K +template <typename T> +StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose transpose_ab, + const size_t n, const size_t k, + const T alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= } // namespace clblast diff --git a/src/clblast.cc b/src/clblast.cc index bb0091a3..e0d085a9 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -209,7 +209,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, std::string kernel_source = #include "kernels/xgemm.opencl" auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + kernel_source); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -244,4 +244,124 @@ template StatusCode Symm<double2>(const Layout, const Side, const Triangle, cl_command_queue*, cl_event*); // ================================================================================================= + +// SYRK +template <typename T> +StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose transpose_a, + const size_t n, const size_t k, const T alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = CommandQueue(*queue); + auto event_cpp = Event(*event); + /* + auto routine = Xsyrk<T>(queue_cpp, event_cpp); + + // Loads the kernel source-code as an include (C++11 raw string literal) + std::string common_source1 = + #include "kernels/copy.opencl" + std::string common_source2 = + #include "kernels/pad.opencl" + std::string common_source3 = + #include "kernels/transpose.opencl" + std::string common_source4 = + #include "kernels/padtranspose.opencl" + std::string kernel_source = + #include "kernels/xgemm.opencl" + auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + + kernel_source); + if (status != StatusCode::kSuccess) { return status; } + + // Runs the routine + return routine.DoSyrk(layout, triangle, transpose_a, n, k, alpha, + Buffer(a_buffer), a_offset, a_ld, beta, + Buffer(c_buffer), c_offset, c_ld); + */ + return StatusCode::kSuccess; +} +template StatusCode Syrk<float>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const float, + const cl_mem, const size_t, const size_t, const float, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syrk<double>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const double, + const cl_mem, const size_t, const size_t, const double, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syrk<float2>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const float2, + const cl_mem, const size_t, const size_t, const float2, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syrk<double2>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const double2, + const cl_mem, const size_t, const size_t, const double2, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + +// ================================================================================================= + +// SYR2K +template <typename T> +StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose transpose_ab, + const size_t n, const size_t k, const T alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = CommandQueue(*queue); + auto event_cpp = Event(*event); + /* + auto routine = Xsyr2k<T>(queue_cpp, event_cpp); + + // Loads the kernel source-code as an include (C++11 raw string literal) + std::string common_source1 = + #include "kernels/copy.opencl" + std::string common_source2 = + #include "kernels/pad.opencl" + std::string common_source3 = + #include "kernels/transpose.opencl" + std::string common_source4 = + #include "kernels/padtranspose.opencl" + std::string kernel_source = + #include "kernels/xgemm.opencl" + auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + + kernel_source); + if (status != StatusCode::kSuccess) { return status; } + + // Runs the routine + return routine.DoSyr2k(layout, triangle, transpose_ab, n, k, alpha, + Buffer(a_buffer), a_offset, a_ld, + Buffer(b_buffer), b_offset, b_ld, beta, + Buffer(c_buffer), c_offset, c_ld); + */ + return StatusCode::kSuccess; +} +template StatusCode Syr2k<float>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const float, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, const float, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syr2k<double>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const double, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, const double, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syr2k<float2>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const float2, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, const float2, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode Syr2k<double2>(const Layout, const Triangle, const Transpose, + const size_t, const size_t, const double2, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, const double2, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + +// ================================================================================================= } // namespace clblast diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 093a8742..d6df0835 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -201,7 +201,7 @@ clblasStatus clblasXgemm( num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasSsymm, clblasDsymm} with the arguments forwarded. +// This calls {clblasSsymm, clblasDsymm, clblasCsymm, clblasZsymm} with the arguments forwarded. clblasStatus clblasXsymm( clblasOrder layout, clblasSide side, clblasUplo triangle, size_t m, size_t n, float alpha, @@ -267,6 +267,130 @@ clblasStatus clblasXsymm( num_queues, queues, num_wait_events, wait_events, events); } +// This calls {clblasSsyrk, clblasDsyrk, clblasCsyrk, clblasZsyrk} with the arguments forwarded. +clblasStatus clblasXsyrk( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a, + size_t n, size_t k, float alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, float beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsyrk(layout, triangle, tran_a, + n, k, alpha, + a_mat, a_offset, a_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a, + size_t n, size_t k, double alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, double beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsyrk(layout, triangle, tran_a, + n, k, alpha, + a_mat, a_offset, a_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a, + size_t n, size_t k, float2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, float2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; + return clblasCsyrk(layout, triangle, tran_a, + n, k, cl_alpha, + a_mat, a_offset, a_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_a, + size_t n, size_t k, double2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, double2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; + return clblasZsyrk(layout, triangle, tran_a, + n, k, cl_alpha, + a_mat, a_offset, a_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} + +// This calls {clblasSsyr2k, clblasDsyr2k, clblasCsyr2k, clblasZsyr2k} with the arguments forwarded. +clblasStatus clblasXsyr2k( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab, + size_t n, size_t k, float alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsyr2k(layout, triangle, tran_ab, + n, k, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab, + size_t n, size_t k, double alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsyr2k(layout, triangle, tran_ab, + n, k, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab, + size_t n, size_t k, float2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; + return clblasCsyr2k(layout, triangle, tran_ab, + n, k, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k( + clblasOrder layout, clblasUplo triangle, clblasTranspose tran_ab, + size_t n, size_t k, double2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; + return clblasZsyr2k(layout, triangle, tran_ab, + n, k, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} + // ================================================================================================= } // namespace clblast |