summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-06-21 12:44:03 +0200
committerCNugteren <web@cedricnugteren.nl>2015-06-21 12:44:03 +0200
commite3829c1067814c0aa83ab440fa431d98837aeeda (patch)
tree18516e3bc42bc71746312ea9efbcd677d5d71e2b /src
parentea7da6a49758af50302be040ab7a97a7a8c0f692 (diff)
Added prototypes of SYRK and SYR2K
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cc122
1 files changed, 121 insertions, 1 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index bb0091a3..e0d085a9 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -209,7 +209,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ kernel_source);
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -244,4 +244,124 @@ template StatusCode Symm<double2>(const Layout, const Side, const Triangle,
cl_command_queue*, cl_event*);
// =================================================================================================
+
+// SYRK
+template <typename T>
+StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose transpose_a,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ /*
+ auto routine = Xsyrk<T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoSyrk(layout, triangle, transpose_a, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+ */
+ return StatusCode::kSuccess;
+}
+template StatusCode Syrk<float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<float2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t, const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syrk<double2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t, const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
+
+// SYR2K
+template <typename T>
+StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose transpose_ab,
+ const size_t n, const size_t k, const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = CommandQueue(*queue);
+ auto event_cpp = Event(*event);
+ /*
+ auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
+
+ // Loads the kernel source-code as an include (C++11 raw string literal)
+ std::string common_source1 =
+ #include "kernels/copy.opencl"
+ std::string common_source2 =
+ #include "kernels/pad.opencl"
+ std::string common_source3 =
+ #include "kernels/transpose.opencl"
+ std::string common_source4 =
+ #include "kernels/padtranspose.opencl"
+ std::string kernel_source =
+ #include "kernels/xgemm.opencl"
+ auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
+ kernel_source);
+ if (status != StatusCode::kSuccess) { return status; }
+
+ // Runs the routine
+ return routine.DoSyr2k(layout, triangle, transpose_ab, n, k, alpha,
+ Buffer(a_buffer), a_offset, a_ld,
+ Buffer(b_buffer), b_offset, b_ld, beta,
+ Buffer(c_buffer), c_offset, c_ld);
+ */
+ return StatusCode::kSuccess;
+}
+template StatusCode Syr2k<float>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<double>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<float2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const float2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode Syr2k<double2>(const Layout, const Triangle, const Transpose,
+ const size_t, const size_t, const double2,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t, const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
} // namespace clblast