summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-02-20 14:15:41 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2016-02-20 14:15:41 +0100
commit6dc44da07bc0209a399a3e40300aa859e41034d9 (patch)
tree12ecdbb35e98f92919c42ad5fc4068a0cf66e0e8 /src
parent8854a731276b3f32c9e381a228733de7c6d95760 (diff)
Added support for xGERU and xGERC routines
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cc54
-rw-r--r--src/kernels/level2/xger.opencl6
-rw-r--r--src/routines/level1/xdotu.cc1
-rw-r--r--src/routines/level2/xger.cc4
-rw-r--r--src/routines/level2/xgerc.cc53
-rw-r--r--src/routines/level2/xgeru.cc52
6 files changed, 153 insertions, 17 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index aed3f141..47f2c59d 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -39,6 +39,8 @@
#include "internal/routines/level2/xtbmv.h"
#include "internal/routines/level2/xtpmv.h"
#include "internal/routines/level2/xger.h"
+#include "internal/routines/level2/xgeru.h"
+#include "internal/routines/level2/xgerc.h"
// BLAS level-3 includes
#include "internal/routines/level3/xgemm.h"
@@ -872,14 +874,24 @@ template StatusCode Ger<double>(const Layout,
// General rank-1 complex matrix update: CGERU/ZGERU
template <typename T>
-StatusCode Geru(const Layout,
- const size_t, const size_t,
- const T,
- const cl_mem, const size_t, const size_t,
- const cl_mem, const size_t, const size_t,
- cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*) {
- return StatusCode::kNotImplemented;
+StatusCode Geru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xgeru<T>(queue_cpp, event_cpp);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoGeru(layout,
+ m, n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
}
template StatusCode Geru<float2>(const Layout,
const size_t, const size_t,
@@ -898,14 +910,24 @@ template StatusCode Geru<double2>(const Layout,
// General rank-1 complex conjugated matrix update: CGERC/ZGERC
template <typename T>
-StatusCode Gerc(const Layout,
- const size_t, const size_t,
- const T,
- const cl_mem, const size_t, const size_t,
- const cl_mem, const size_t, const size_t,
- cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*) {
- return StatusCode::kNotImplemented;
+StatusCode Gerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xgerc<T>(queue_cpp, event_cpp);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoGerc(layout,
+ m, n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
}
template StatusCode Gerc<float2>(const Layout,
const size_t, const size_t,
diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl
index aa765b6c..c1cddea1 100644
--- a/src/kernels/level2/xger.opencl
+++ b/src/kernels/level2/xger.opencl
@@ -62,6 +62,9 @@ __kernel void Xger(const int max_one, const int max_two, const real alpha,
const int id1 = w*get_global_size(0) + get_global_id(0);
if (id1 < max_one) {
yvalues[w] = ygm[id1*y_inc + y_offset];
+ #if defined(ROUTINE_GERC)
+ COMPLEX_CONJUGATE(yvalues[w]);
+ #endif
}
}
@@ -110,6 +113,9 @@ __kernel void Xger(const int max_one, const int max_two, const real alpha,
const int id2 = w*get_global_size(1) + get_global_id(1);
if (id2 < max_two) {
yvalues[w] = ygm[id2*y_inc + y_offset];
+ #if defined(ROUTINE_GERC)
+ COMPLEX_CONJUGATE(yvalues[w]);
+ #endif
}
}
diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc
index 0b1bd2a8..28d9b730 100644
--- a/src/routines/level1/xdotu.cc
+++ b/src/routines/level1/xdotu.cc
@@ -14,7 +14,6 @@
#include "internal/routines/level1/xdotu.h"
#include <string>
-#include <vector>
namespace clblast {
// =================================================================================================
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index c3a24264..55fa26d4 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -22,6 +22,8 @@ namespace clblast {
// Specific implementations to get the memory-type based on a template argument
template <> const Precision Xger<float>::precision_ = Precision::kSingle;
template <> const Precision Xger<double>::precision_ = Precision::kDouble;
+template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle;
+template <> const Precision Xger<double2>::precision_ = Precision::kComplexDouble;
// =================================================================================================
@@ -102,6 +104,8 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Compiles the templated class
template class Xger<float>;
template class Xger<double>;
+template class Xger<float2>;
+template class Xger<double2>;
// =================================================================================================
} // namespace clblast
diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cc
new file mode 100644
index 00000000..09408898
--- /dev/null
+++ b/src/routines/level2/xgerc.cc
@@ -0,0 +1,53 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgerc class (see the header for information about the class).
+//
+// =================================================================================================
+
+#include "internal/routines/level2/xgerc.h"
+
+#include <string>
+
+namespace clblast {
+// =================================================================================================
+
+// Constructor: forwards to base class constructor
+template <typename T>
+Xgerc<T>::Xgerc(Queue &queue, Event &event, const std::string &name):
+ Xger<T>(queue, event, name) {
+}
+
+// =================================================================================================
+
+// The main routine
+template <typename T>
+StatusCode Xgerc<T>::DoGerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+
+ // Regular Ger operation on complex data, plus conjugation in the kernel guarded by the
+ // ROUTINE_GERC guard.
+ return DoGer(layout, m, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld);
+}
+
+// =================================================================================================
+
+// Compiles the templated class
+template class Xgerc<float2>;
+template class Xgerc<double2>;
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cc
new file mode 100644
index 00000000..36fd9d0a
--- /dev/null
+++ b/src/routines/level2/xgeru.cc
@@ -0,0 +1,52 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgeru class (see the header for information about the class).
+//
+// =================================================================================================
+
+#include "internal/routines/level2/xgeru.h"
+
+#include <string>
+
+namespace clblast {
+// =================================================================================================
+
+// Constructor: forwards to base class constructor
+template <typename T>
+Xgeru<T>::Xgeru(Queue &queue, Event &event, const std::string &name):
+ Xger<T>(queue, event, name) {
+}
+
+// =================================================================================================
+
+// The main routine
+template <typename T>
+StatusCode Xgeru<T>::DoGeru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+
+ // Regular Ger operation on complex data
+ return DoGer(layout, m, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld);
+}
+
+// =================================================================================================
+
+// Compiles the templated class
+template class Xgeru<float2>;
+template class Xgeru<double2>;
+
+// =================================================================================================
+} // namespace clblast