summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/internal/clpp11.h55
-rw-r--r--include/internal/routine.h14
-rw-r--r--include/internal/routines/level1/xaxpy.h3
-rw-r--r--include/internal/routines/level1/xcopy.h3
-rw-r--r--include/internal/routines/level1/xdot.h3
-rw-r--r--include/internal/routines/level1/xdotc.h2
-rw-r--r--include/internal/routines/level1/xdotu.h2
-rw-r--r--include/internal/routines/level1/xnrm2.h3
-rw-r--r--include/internal/routines/level1/xscal.h3
-rw-r--r--include/internal/routines/level1/xswap.h3
-rw-r--r--include/internal/routines/level2/xgbmv.h2
-rw-r--r--include/internal/routines/level2/xgemv.h3
-rw-r--r--include/internal/routines/level2/xger.h3
-rw-r--r--include/internal/routines/level2/xgerc.h2
-rw-r--r--include/internal/routines/level2/xgeru.h2
-rw-r--r--include/internal/routines/level2/xhbmv.h2
-rw-r--r--include/internal/routines/level2/xhemv.h2
-rw-r--r--include/internal/routines/level2/xher.h3
-rw-r--r--include/internal/routines/level2/xher2.h3
-rw-r--r--include/internal/routines/level2/xhpmv.h2
-rw-r--r--include/internal/routines/level2/xhpr.h2
-rw-r--r--include/internal/routines/level2/xhpr2.h2
-rw-r--r--include/internal/routines/level2/xsbmv.h2
-rw-r--r--include/internal/routines/level2/xspmv.h2
-rw-r--r--include/internal/routines/level2/xspr.h2
-rw-r--r--include/internal/routines/level2/xspr2.h2
-rw-r--r--include/internal/routines/level2/xsymv.h2
-rw-r--r--include/internal/routines/level2/xsyr.h2
-rw-r--r--include/internal/routines/level2/xsyr2.h2
-rw-r--r--include/internal/routines/level2/xtbmv.h2
-rw-r--r--include/internal/routines/level2/xtpmv.h2
-rw-r--r--include/internal/routines/level2/xtrmv.h2
-rw-r--r--include/internal/routines/level3/xgemm.h3
-rw-r--r--include/internal/routines/level3/xhemm.h2
-rw-r--r--include/internal/routines/level3/xher2k.h3
-rw-r--r--include/internal/routines/level3/xherk.h3
-rw-r--r--include/internal/routines/level3/xsymm.h2
-rw-r--r--include/internal/routines/level3/xsyr2k.h3
-rw-r--r--include/internal/routines/level3/xsyrk.h3
-rw-r--r--include/internal/routines/level3/xtrmm.h2
40 files changed, 106 insertions, 54 deletions
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h
index 00905ef7..543d423a 100644
--- a/include/internal/clpp11.h
+++ b/include/internal/clpp11.h
@@ -73,29 +73,41 @@ class Event {
public:
// Constructor based on the regular OpenCL data-type
- explicit Event(cl_event* event): event_(event) { }
+ explicit Event(const cl_event event): event_(event) { }
+
+ // Regular constructor
+ explicit Event(): event_(nullptr) { }
+
+ // Waits for completion of this event
+ void WaitForCompletion() const {
+ CheckError(clWaitForEvents(1, &event_));
+ }
// Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
// the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
// http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx
float GetElapsedTime() const {
- CheckError(clWaitForEvents(1, event_));
+ WaitForCompletion();
auto bytes = size_t{0};
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
auto time_start = size_t{0};
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
auto time_end = size_t{0};
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
return (time_end - time_start) * 1.0e-6f;
}
// Accessor to the private data-member
- cl_event& operator()() { return *event_; }
+ cl_event& operator()() { return event_; }
+ cl_event* pointer() { return &event_; }
private:
- cl_event* event_;
+ cl_event event_;
};
+// Pointer to an OpenCL event
+using EventPointer = cl_event*;
+
// =================================================================================================
// C++11 version of 'cl_platform_id'
@@ -600,17 +612,36 @@ class Kernel {
// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
- const std::vector<size_t> &local, Event &event) {
+ const std::vector<size_t> &local, EventPointer event) {
+ CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
+ nullptr, global.data(), local.data(),
+ 0, nullptr, event));
+ }
+
+ // As above, but with an event waiting list
+ void Launch(const Queue &queue, const std::vector<size_t> &global,
+ const std::vector<size_t> &local, EventPointer event,
+ std::vector<Event>& waitForEvents) {
+ if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
+
+ // Builds a plain version of the events waiting list
+ auto waitForEventsPlain = std::vector<cl_event>();
+ for (auto &waitEvent : waitForEvents) {
+ waitForEventsPlain.push_back(waitEvent());
+ }
+
+ // Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
- 0, nullptr, &(event())));
+ waitForEventsPlain.size(), waitForEventsPlain.data(),
+ event));
}
// As above, but with the default local workgroup size
- void Launch(const Queue &queue, const std::vector<size_t> &global, Event &event) {
+ void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), nullptr,
- 0, nullptr, &(event())));
+ 0, nullptr, event));
}
// Accessor to the private data-member
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 5f5b8211..b2b6f622 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -55,7 +55,7 @@ class Routine {
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
// Base class constructor
- explicit Routine(Queue &queue, Event &event, const std::string &name,
+ explicit Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision);
// Set-up phase of the kernel
@@ -65,7 +65,12 @@ class Routine {
// Runs a kernel given the global and local thread sizes
StatusCode RunKernel(Kernel &kernel, std::vector<size_t> &global,
- const std::vector<size_t> &local);
+ const std::vector<size_t> &local, EventPointer event,
+ std::vector<Event>& waitForEvents);
+
+ // As above, but without an event waiting list
+ StatusCode RunKernel(Kernel &kernel, std::vector<size_t> &global,
+ const std::vector<size_t> &local, EventPointer event);
// Tests for valid inputs of matrices A, B, and C
StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
@@ -87,7 +92,8 @@ class Routine {
// Copies/transposes a matrix and padds/unpads it with zeroes. This method is also able to write
// to symmetric and triangular matrices through optional arguments.
- StatusCode PadCopyTransposeMatrix(const size_t src_one, const size_t src_two,
+ StatusCode PadCopyTransposeMatrix(EventPointer event, std::vector<Event>& waitForEvents,
+ const size_t src_one, const size_t src_two,
const size_t src_ld, const size_t src_offset,
const Buffer<T> &src,
const size_t dest_one, const size_t dest_two,
@@ -114,7 +120,7 @@ class Routine {
// The OpenCL objects, accessible only from derived classes
Queue queue_;
- Event event_;
+ EventPointer event_;
const Context context_;
const Device device_;
diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h
index 689cf169..bc00c8e3 100644
--- a/include/internal/routines/level1/xaxpy.h
+++ b/include/internal/routines/level1/xaxpy.h
@@ -28,6 +28,7 @@ class Xaxpy: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -35,7 +36,7 @@ class Xaxpy: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xaxpy(Queue &queue, Event &event, const std::string &name = "AXPY");
+ Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY");
// Templated-precision implementation of the routine
StatusCode DoAxpy(const size_t n, const T alpha,
diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h
index 15f339aa..5786cb0f 100644
--- a/include/internal/routines/level1/xcopy.h
+++ b/include/internal/routines/level1/xcopy.h
@@ -28,6 +28,7 @@ class Xcopy: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -35,7 +36,7 @@ class Xcopy: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xcopy(Queue &queue, Event &event, const std::string &name = "COPY");
+ Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY");
// Templated-precision implementation of the routine
StatusCode DoCopy(const size_t n,
diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h
index 64b62945..95a7ad07 100644
--- a/include/internal/routines/level1/xdot.h
+++ b/include/internal/routines/level1/xdot.h
@@ -28,6 +28,7 @@ class Xdot: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
@@ -37,7 +38,7 @@ class Xdot: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xdot(Queue &queue, Event &event, const std::string &name = "DOT");
+ Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT");
// Templated-precision implementation of the routine
StatusCode DoDot(const size_t n,
diff --git a/include/internal/routines/level1/xdotc.h b/include/internal/routines/level1/xdotc.h
index 726cec7c..0dc2cfe9 100644
--- a/include/internal/routines/level1/xdotc.h
+++ b/include/internal/routines/level1/xdotc.h
@@ -28,7 +28,7 @@ class Xdotc: public Xdot<T> {
using Xdot<T>::DoDot;
// Constructor
- Xdotc(Queue &queue, Event &event, const std::string &name = "DOTC");
+ Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC");
// Templated-precision implementation of the routine
StatusCode DoDotc(const size_t n,
diff --git a/include/internal/routines/level1/xdotu.h b/include/internal/routines/level1/xdotu.h
index 825ebb78..98988744 100644
--- a/include/internal/routines/level1/xdotu.h
+++ b/include/internal/routines/level1/xdotu.h
@@ -28,7 +28,7 @@ class Xdotu: public Xdot<T> {
using Xdot<T>::DoDot;
// Constructor
- Xdotu(Queue &queue, Event &event, const std::string &name = "DOTU");
+ Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU");
// Templated-precision implementation of the routine
StatusCode DoDotu(const size_t n,
diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h
index b3fffef6..6f6ca74f 100644
--- a/include/internal/routines/level1/xnrm2.h
+++ b/include/internal/routines/level1/xnrm2.h
@@ -28,6 +28,7 @@ class Xnrm2: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
@@ -36,7 +37,7 @@ class Xnrm2: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xnrm2(Queue &queue, Event &event, const std::string &name = "NRM2");
+ Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2");
// Templated-precision implementation of the routine
StatusCode DoNrm2(const size_t n,
diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h
index d97b5a07..e10a201d 100644
--- a/include/internal/routines/level1/xscal.h
+++ b/include/internal/routines/level1/xscal.h
@@ -28,13 +28,14 @@ class Xscal: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::RunKernel;
using Routine<T>::ErrorIn;
// Constructor
- Xscal(Queue &queue, Event &event, const std::string &name = "SCAL");
+ Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL");
// Templated-precision implementation of the routine
StatusCode DoScal(const size_t n, const T alpha,
diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h
index fe79882b..0f240763 100644
--- a/include/internal/routines/level1/xswap.h
+++ b/include/internal/routines/level1/xswap.h
@@ -28,6 +28,7 @@ class Xswap: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -35,7 +36,7 @@ class Xswap: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xswap(Queue &queue, Event &event, const std::string &name = "SWAP");
+ Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP");
// Templated-precision implementation of the routine
StatusCode DoSwap(const size_t n,
diff --git a/include/internal/routines/level2/xgbmv.h b/include/internal/routines/level2/xgbmv.h
index 27b033e9..bc94c77d 100644
--- a/include/internal/routines/level2/xgbmv.h
+++ b/include/internal/routines/level2/xgbmv.h
@@ -30,7 +30,7 @@ class Xgbmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xgbmv(Queue &queue, Event &event, const std::string &name = "GBMV");
+ Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV");
// Templated-precision implementation of the routine
StatusCode DoGbmv(const Layout layout, const Transpose a_transpose,
diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h
index b31565ec..0b2a8e66 100644
--- a/include/internal/routines/level2/xgemv.h
+++ b/include/internal/routines/level2/xgemv.h
@@ -28,6 +28,7 @@ class Xgemv: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -37,7 +38,7 @@ class Xgemv: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xgemv(Queue &queue, Event &event, const std::string &name = "GEMV");
+ Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV");
// Templated-precision implementation of the routine
StatusCode DoGemv(const Layout layout, const Transpose a_transpose,
diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h
index 45ecea10..5ace9da6 100644
--- a/include/internal/routines/level2/xger.h
+++ b/include/internal/routines/level2/xger.h
@@ -28,6 +28,7 @@ class Xger: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -36,7 +37,7 @@ class Xger: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xger(Queue &queue, Event &event, const std::string &name = "GER");
+ Xger(Queue &queue, EventPointer event, const std::string &name = "GER");
// Templated-precision implementation of the routine
StatusCode DoGer(const Layout layout,
diff --git a/include/internal/routines/level2/xgerc.h b/include/internal/routines/level2/xgerc.h
index 8e515a14..6d06ef94 100644
--- a/include/internal/routines/level2/xgerc.h
+++ b/include/internal/routines/level2/xgerc.h
@@ -28,7 +28,7 @@ class Xgerc: public Xger<T> {
using Xger<T>::DoGer;
// Constructor
- Xgerc(Queue &queue, Event &event, const std::string &name = "GERC");
+ Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC");
// Templated-precision implementation of the routine
StatusCode DoGerc(const Layout layout,
diff --git a/include/internal/routines/level2/xgeru.h b/include/internal/routines/level2/xgeru.h
index ec485c37..45ce1cba 100644
--- a/include/internal/routines/level2/xgeru.h
+++ b/include/internal/routines/level2/xgeru.h
@@ -28,7 +28,7 @@ class Xgeru: public Xger<T> {
using Xger<T>::DoGer;
// Constructor
- Xgeru(Queue &queue, Event &event, const std::string &name = "GERU");
+ Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU");
// Templated-precision implementation of the routine
StatusCode DoGeru(const Layout layout,
diff --git a/include/internal/routines/level2/xhbmv.h b/include/internal/routines/level2/xhbmv.h
index 65138424..f0a6212c 100644
--- a/include/internal/routines/level2/xhbmv.h
+++ b/include/internal/routines/level2/xhbmv.h
@@ -30,7 +30,7 @@ class Xhbmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xhbmv(Queue &queue, Event &event, const std::string &name = "HBMV");
+ Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV");
// Templated-precision implementation of the routine
StatusCode DoHbmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xhemv.h b/include/internal/routines/level2/xhemv.h
index b74db760..3daf2457 100644
--- a/include/internal/routines/level2/xhemv.h
+++ b/include/internal/routines/level2/xhemv.h
@@ -30,7 +30,7 @@ class Xhemv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xhemv(Queue &queue, Event &event, const std::string &name = "HEMV");
+ Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV");
// Templated-precision implementation of the routine
StatusCode DoHemv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h
index 6322265b..861ba302 100644
--- a/include/internal/routines/level2/xher.h
+++ b/include/internal/routines/level2/xher.h
@@ -28,6 +28,7 @@ class Xher: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestMatrixA;
@@ -36,7 +37,7 @@ class Xher: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xher(Queue &queue, Event &event, const std::string &name = "HER");
+ Xher(Queue &queue, EventPointer event, const std::string &name = "HER");
// Translates alpha of type 'U' into type 'T'
T GetAlpha(const U alpha);
diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h
index 26f69046..9a23199e 100644
--- a/include/internal/routines/level2/xher2.h
+++ b/include/internal/routines/level2/xher2.h
@@ -28,6 +28,7 @@ class Xher2: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::TestVectorX;
using Routine<T>::TestVectorY;
@@ -37,7 +38,7 @@ class Xher2: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xher2(Queue &queue, Event &event, const std::string &name = "HER2");
+ Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2");
// Templated-precision implementation of the routine
StatusCode DoHer2(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xhpmv.h b/include/internal/routines/level2/xhpmv.h
index 48f1ed3f..a1d5595a 100644
--- a/include/internal/routines/level2/xhpmv.h
+++ b/include/internal/routines/level2/xhpmv.h
@@ -30,7 +30,7 @@ class Xhpmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xhpmv(Queue &queue, Event &event, const std::string &name = "HPMV");
+ Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV");
// Templated-precision implementation of the routine
StatusCode DoHpmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xhpr.h b/include/internal/routines/level2/xhpr.h
index a0c3cb92..6554d74c 100644
--- a/include/internal/routines/level2/xhpr.h
+++ b/include/internal/routines/level2/xhpr.h
@@ -28,7 +28,7 @@ class Xhpr: public Xher<T,U> {
using Xher<T,U>::DoHer;
// Constructor
- Xhpr(Queue &queue, Event &event, const std::string &name = "HPR");
+ Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR");
// Templated-precision implementation of the routine
StatusCode DoHpr(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xhpr2.h b/include/internal/routines/level2/xhpr2.h
index fd243d33..d95e7b61 100644
--- a/include/internal/routines/level2/xhpr2.h
+++ b/include/internal/routines/level2/xhpr2.h
@@ -28,7 +28,7 @@ class Xhpr2: public Xher2<T> {
using Xher2<T>::DoHer2;
// Constructor
- Xhpr2(Queue &queue, Event &event, const std::string &name = "HPR2");
+ Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2");
// Templated-precision implementation of the routine
StatusCode DoHpr2(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xsbmv.h b/include/internal/routines/level2/xsbmv.h
index bb24d8f4..4328e377 100644
--- a/include/internal/routines/level2/xsbmv.h
+++ b/include/internal/routines/level2/xsbmv.h
@@ -30,7 +30,7 @@ class Xsbmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xsbmv(Queue &queue, Event &event, const std::string &name = "SBMV");
+ Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV");
// Templated-precision implementation of the routine
StatusCode DoSbmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xspmv.h b/include/internal/routines/level2/xspmv.h
index 88f02a2f..ca3e28b6 100644
--- a/include/internal/routines/level2/xspmv.h
+++ b/include/internal/routines/level2/xspmv.h
@@ -30,7 +30,7 @@ class Xspmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xspmv(Queue &queue, Event &event, const std::string &name = "SPMV");
+ Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV");
// Templated-precision implementation of the routine
StatusCode DoSpmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xspr.h b/include/internal/routines/level2/xspr.h
index 5b01d2cb..7e91abc5 100644
--- a/include/internal/routines/level2/xspr.h
+++ b/include/internal/routines/level2/xspr.h
@@ -28,7 +28,7 @@ class Xspr: public Xher<T,T> {
using Xher<T,T>::DoHer;
// Constructor
- Xspr(Queue &queue, Event &event, const std::string &name = "SPR");
+ Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR");
// Templated-precision implementation of the routine
StatusCode DoSpr(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xspr2.h b/include/internal/routines/level2/xspr2.h
index 3d5f4992..a34be8e8 100644
--- a/include/internal/routines/level2/xspr2.h
+++ b/include/internal/routines/level2/xspr2.h
@@ -28,7 +28,7 @@ class Xspr2: public Xher2<T> {
using Xher2<T>::DoHer2;
// Constructor
- Xspr2(Queue &queue, Event &event, const std::string &name = "SPR2");
+ Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2");
// Templated-precision implementation of the routine
StatusCode DoSpr2(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xsymv.h b/include/internal/routines/level2/xsymv.h
index c7b92702..98a0ce88 100644
--- a/include/internal/routines/level2/xsymv.h
+++ b/include/internal/routines/level2/xsymv.h
@@ -30,7 +30,7 @@ class Xsymv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xsymv(Queue &queue, Event &event, const std::string &name = "SYMV");
+ Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV");
// Templated-precision implementation of the routine
StatusCode DoSymv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xsyr.h b/include/internal/routines/level2/xsyr.h
index 9704a881..f88498ae 100644
--- a/include/internal/routines/level2/xsyr.h
+++ b/include/internal/routines/level2/xsyr.h
@@ -28,7 +28,7 @@ class Xsyr: public Xher<T,T> {
using Xher<T,T>::DoHer;
// Constructor
- Xsyr(Queue &queue, Event &event, const std::string &name = "SYR");
+ Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR");
// Templated-precision implementation of the routine
StatusCode DoSyr(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xsyr2.h b/include/internal/routines/level2/xsyr2.h
index f4dc9375..d2d3143a 100644
--- a/include/internal/routines/level2/xsyr2.h
+++ b/include/internal/routines/level2/xsyr2.h
@@ -28,7 +28,7 @@ class Xsyr2: public Xher2<T> {
using Xher2<T>::DoHer2;
// Constructor
- Xsyr2(Queue &queue, Event &event, const std::string &name = "SYR2");
+ Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2");
// Templated-precision implementation of the routine
StatusCode DoSyr2(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xtbmv.h b/include/internal/routines/level2/xtbmv.h
index 89c90193..3b358080 100644
--- a/include/internal/routines/level2/xtbmv.h
+++ b/include/internal/routines/level2/xtbmv.h
@@ -34,7 +34,7 @@ class Xtbmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xtbmv(Queue &queue, Event &event, const std::string &name = "TBMV");
+ Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV");
// Templated-precision implementation of the routine
StatusCode DoTbmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xtpmv.h b/include/internal/routines/level2/xtpmv.h
index 183d3505..f306cf4a 100644
--- a/include/internal/routines/level2/xtpmv.h
+++ b/include/internal/routines/level2/xtpmv.h
@@ -34,7 +34,7 @@ class Xtpmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xtpmv(Queue &queue, Event &event, const std::string &name = "TPMV");
+ Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV");
// Templated-precision implementation of the routine
StatusCode DoTpmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level2/xtrmv.h b/include/internal/routines/level2/xtrmv.h
index dadfbc98..cf0824a4 100644
--- a/include/internal/routines/level2/xtrmv.h
+++ b/include/internal/routines/level2/xtrmv.h
@@ -34,7 +34,7 @@ class Xtrmv: public Xgemv<T> {
using Xgemv<T>::MatVec;
// Constructor
- Xtrmv(Queue &queue, Event &event, const std::string &name = "TRMV");
+ Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV");
// Templated-precision implementation of the routine
StatusCode DoTrmv(const Layout layout, const Triangle triangle,
diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h
index 9b40a7fc..85fb0616 100644
--- a/include/internal/routines/level3/xgemm.h
+++ b/include/internal/routines/level3/xgemm.h
@@ -28,6 +28,7 @@ class Xgemm: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::PadCopyTransposeMatrix;
@@ -38,7 +39,7 @@ class Xgemm: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xgemm(Queue &queue, Event &event, const std::string &name = "GEMM");
+ Xgemm(Queue &queue, EventPointer event, const std::string &name = "GEMM");
// Templated-precision implementation of the routine
StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h
index ca38ca08..ec42b569 100644
--- a/include/internal/routines/level3/xhemm.h
+++ b/include/internal/routines/level3/xhemm.h
@@ -37,7 +37,7 @@ class Xhemm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xhemm(Queue &queue, Event &event, const std::string &name = "HEMM");
+ Xhemm(Queue &queue, EventPointer event, const std::string &name = "HEMM");
// Templated-precision implementation of the routine
StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle,
diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h
index 7113a172..623afd49 100644
--- a/include/internal/routines/level3/xher2k.h
+++ b/include/internal/routines/level3/xher2k.h
@@ -30,6 +30,7 @@ class Xher2k: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::PadCopyTransposeMatrix;
@@ -40,7 +41,7 @@ class Xher2k: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xher2k(Queue &queue, Event &event, const std::string &name = "HER2K");
+ Xher2k(Queue &queue, EventPointer event, const std::string &name = "HER2K");
// Templated-precision implementation of the routine
StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h
index 47112c2c..629695ff 100644
--- a/include/internal/routines/level3/xherk.h
+++ b/include/internal/routines/level3/xherk.h
@@ -30,6 +30,7 @@ class Xherk: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::PadCopyTransposeMatrix;
@@ -39,7 +40,7 @@ class Xherk: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xherk(Queue &queue, Event &event, const std::string &name = "HERK");
+ Xherk(Queue &queue, EventPointer event, const std::string &name = "HERK");
// Templated-precision implementation of the routine
StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h
index 9fc80eb4..16ad6f53 100644
--- a/include/internal/routines/level3/xsymm.h
+++ b/include/internal/routines/level3/xsymm.h
@@ -39,7 +39,7 @@ class Xsymm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xsymm(Queue &queue, Event &event, const std::string &name = "SYMM");
+ Xsymm(Queue &queue, EventPointer event, const std::string &name = "SYMM");
// Templated-precision implementation of the routine
StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle,
diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h
index c4679028..88669626 100644
--- a/include/internal/routines/level3/xsyr2k.h
+++ b/include/internal/routines/level3/xsyr2k.h
@@ -30,6 +30,7 @@ class Xsyr2k: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::PadCopyTransposeMatrix;
@@ -40,7 +41,7 @@ class Xsyr2k: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xsyr2k(Queue &queue, Event &event, const std::string &name = "SYR2K");
+ Xsyr2k(Queue &queue, EventPointer event, const std::string &name = "SYR2K");
// Templated-precision implementation of the routine
StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h
index abf6b681..e95c7c1c 100644
--- a/include/internal/routines/level3/xsyrk.h
+++ b/include/internal/routines/level3/xsyrk.h
@@ -32,6 +32,7 @@ class Xsyrk: public Routine<T> {
using Routine<T>::db_;
using Routine<T>::source_string_;
using Routine<T>::queue_;
+ using Routine<T>::event_;
using Routine<T>::context_;
using Routine<T>::GetProgramFromCache;
using Routine<T>::PadCopyTransposeMatrix;
@@ -41,7 +42,7 @@ class Xsyrk: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xsyrk(Queue &queue, Event &event, const std::string &name = "SYRK");
+ Xsyrk(Queue &queue, EventPointer event, const std::string &name = "SYRK");
// Templated-precision implementation of the routine
StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h
index a1f4d15c..01f6594d 100644
--- a/include/internal/routines/level3/xtrmm.h
+++ b/include/internal/routines/level3/xtrmm.h
@@ -38,7 +38,7 @@ class Xtrmm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xtrmm(Queue &queue, Event &event, const std::string &name = "TRMM");
+ Xtrmm(Queue &queue, EventPointer event, const std::string &name = "TRMM");
// Templated-precision implementation of the routine
StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle,