summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-06-18 18:16:14 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-06-18 18:16:14 +0200
commitbacb5d2bb2ea7b141034878090aca850db8f9d00 (patch)
tree7315f72f18c93fa02302e58e2718d2fbfd9db361 /include
parent7b4c0e1cf03a94077c20f7f12ef15fb8717c74ca (diff)
Clean-up of the routine class, moved RunKernel to the routine/common file
Diffstat (limited to 'include')
-rw-r--r--include/internal/routine.h19
-rw-r--r--include/internal/routines/common.h24
2 files changed, 21 insertions, 22 deletions
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 9db4e227..a6a59d77 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -23,6 +23,7 @@
#include "internal/utilities.h"
#include "internal/database.h"
#include "internal/buffer_test.h"
+#include "internal/routines/common.h"
namespace clblast {
// =================================================================================================
@@ -40,8 +41,7 @@ class Routine {
protected:
- // Non-static variable for the precision. Note that the same variable (but static) might exist in
- // a derived class.
+ // Non-static variable for the precision
const Precision precision_;
// The routine's name and its kernel-source in string form
@@ -62,22 +62,7 @@ class Routine {
};
// =================================================================================================
-
-// Enqueues a kernel, waits for completion, and checks for errors
-StatusCode RunKernel(Kernel &kernel, Queue queue, const Device device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, std::vector<Event>& waitForEvents);
-
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue queue, const Device device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event);
-
-// =================================================================================================
} // namespace clblast
-// Temporary fix: TODO place include in a more logical place
-#include "internal/routines/common.h"
-
// CLBLAST_ROUTINE_H_
#endif
diff --git a/include/internal/routines/common.h b/include/internal/routines/common.h
index 95fbde46..308785bd 100644
--- a/include/internal/routines/common.h
+++ b/include/internal/routines/common.h
@@ -8,7 +8,8 @@
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file contains all the interfaces to common kernels, such as copying, padding, and
-// transposing a matrix. These functions are templated and thus header-only.
+// transposing a matrix. These functions are templated and thus header-only. This file also contains
+// other common functions to routines, such as a function to launch a kernel.
//
// =================================================================================================
@@ -18,17 +19,30 @@
#include <string>
#include <vector>
-#include "internal/utilities.h"
-#include "internal/routine.h"
+#include "clblast.h"
+#include "internal/clpp11.h"
+#include "internal/database.h"
namespace clblast {
// =================================================================================================
+// Enqueues a kernel, waits for completion, and checks for errors
+StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
+ std::vector<size_t> global, const std::vector<size_t> &local,
+ EventPointer event, std::vector<Event>& waitForEvents);
+
+// As above, but without an event waiting list
+StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
+ std::vector<size_t> global, const std::vector<size_t> &local,
+ EventPointer event);
+
+// =================================================================================================
+
// Copies or transposes a matrix and optionally pads/unpads it with zeros. This method is also able
// to write to symmetric and triangular matrices through optional arguments.
template <typename T>
-StatusCode PadCopyTransposeMatrix(Queue queue, const Device device, const Context context,
- const Database db,
+StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device, const Context &context,
+ const Database &db,
EventPointer event, std::vector<Event>& waitForEvents,
const size_t src_one, const size_t src_two,
const size_t src_ld, const size_t src_offset,