summaryrefslogtreecommitdiff
path: root/src/clblast.cc
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-06-16 18:07:46 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-06-16 18:07:46 +0200
commit52ccaf5b25e14c9ce032315e5e96b1f27886d481 (patch)
tree087288b7aebf2a06ffc4e7dcbcd4353f7a3be6a7 /src/clblast.cc
parent39b7dbc5e37829abfbcfb77852b9138b31540b42 (diff)
Added XOMATCOPY routines to perform out-of-place matrix scaling, copying, and/or transposing
Diffstat (limited to 'src/clblast.cc')
-rw-r--r--src/clblast.cc56
1 files changed, 56 insertions, 0 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index 07322327..e3df6ede 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -68,6 +68,9 @@
#include "internal/routines/level3/xher2k.h"
#include "internal/routines/level3/xtrmm.h"
+// Extra includes (level-x)
+#include "internal/routines/levelx/xomatcopy.h"
+
namespace clblast {
// =================================================================================================
@@ -2062,6 +2065,59 @@ template StatusCode PUBLIC_API Trsm<half>(const Layout, const Side, const Triang
cl_command_queue*, cl_event*);
// =================================================================================================
+// Extra non-BLAS routines (level-X)
+// =================================================================================================
+
+// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
+template <typename T>
+StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xomatcopy<T>(queue_cpp, event);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoOmatcopy(layout, a_transpose,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld);
+}
+template StatusCode PUBLIC_API Omatcopy<float>(const Layout, const Transpose,
+ const size_t, const size_t,
+ const float,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Omatcopy<double>(const Layout, const Transpose,
+ const size_t, const size_t,
+ const double,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Omatcopy<float2>(const Layout, const Transpose,
+ const size_t, const size_t,
+ const float2,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Omatcopy<double2>(const Layout, const Transpose,
+ const size_t, const size_t,
+ const double2,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Omatcopy<half>(const Layout, const Transpose,
+ const size_t, const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
+// =================================================================================================
// Clears the cache of stored binaries
StatusCode ClearCache() { return cache::ClearCache(); }