diff options
Diffstat (limited to 'include')
40 files changed, 106 insertions, 54 deletions
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h index 00905ef7..543d423a 100644 --- a/include/internal/clpp11.h +++ b/include/internal/clpp11.h @@ -73,29 +73,41 @@ class Event { public: // Constructor based on the regular OpenCL data-type - explicit Event(cl_event* event): event_(event) { } + explicit Event(const cl_event event): event_(event) { } + + // Regular constructor + explicit Event(): event_(nullptr) { } + + // Waits for completion of this event + void WaitForCompletion() const { + CheckError(clWaitForEvents(1, &event_)); + } // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation: // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx float GetElapsedTime() const { - CheckError(clWaitForEvents(1, event_)); + WaitForCompletion(); auto bytes = size_t{0}; - clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes); + clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes); auto time_start = size_t{0}; - clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr); - clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes); + clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr); + clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes); auto time_end = size_t{0}; - clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr); + clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr); return (time_end - time_start) * 1.0e-6f; } // Accessor to the private data-member - cl_event& operator()() { return *event_; } + cl_event& operator()() { return event_; } + cl_event* pointer() { return &event_; } private: - cl_event* event_; + cl_event event_; }; +// Pointer to an OpenCL event +using EventPointer = cl_event*; + // ================================================================================================= // C++11 version of 'cl_platform_id' @@ -600,17 +612,36 @@ class Kernel { // Launches a kernel onto the specified queue void Launch(const Queue &queue, const std::vector<size_t> &global, - const std::vector<size_t> &local, Event &event) { + const std::vector<size_t> &local, EventPointer event) { + CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()), + nullptr, global.data(), local.data(), + 0, nullptr, event)); + } + + // As above, but with an event waiting list + void Launch(const Queue &queue, const std::vector<size_t> &global, + const std::vector<size_t> &local, EventPointer event, + std::vector<Event>& waitForEvents) { + if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); } + + // Builds a plain version of the events waiting list + auto waitForEventsPlain = std::vector<cl_event>(); + for (auto &waitEvent : waitForEvents) { + waitForEventsPlain.push_back(waitEvent()); + } + + // Launches the kernel while waiting for other events CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()), nullptr, global.data(), local.data(), - 0, nullptr, &(event()))); + waitForEventsPlain.size(), waitForEventsPlain.data(), + event)); } // As above, but with the default local workgroup size - void Launch(const Queue &queue, const std::vector<size_t> &global, Event &event) { + void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) { CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()), nullptr, global.data(), nullptr, - 0, nullptr, &(event()))); + 0, nullptr, event)); } // Accessor to the private data-member diff --git a/include/internal/routine.h b/include/internal/routine.h index 5f5b8211..b2b6f622 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -55,7 +55,7 @@ class Routine { static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); } // Base class constructor - explicit Routine(Queue &queue, Event &event, const std::string &name, + explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector<std::string> &routines, const Precision precision); // Set-up phase of the kernel @@ -65,7 +65,12 @@ class Routine { // Runs a kernel given the global and local thread sizes StatusCode RunKernel(Kernel &kernel, std::vector<size_t> &global, - const std::vector<size_t> &local); + const std::vector<size_t> &local, EventPointer event, + std::vector<Event>& waitForEvents); + + // As above, but without an event waiting list + StatusCode RunKernel(Kernel &kernel, std::vector<size_t> &global, + const std::vector<size_t> &local, EventPointer event); // Tests for valid inputs of matrices A, B, and C StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer, @@ -87,7 +92,8 @@ class Routine { // Copies/transposes a matrix and padds/unpads it with zeroes. This method is also able to write // to symmetric and triangular matrices through optional arguments. - StatusCode PadCopyTransposeMatrix(const size_t src_one, const size_t src_two, + StatusCode PadCopyTransposeMatrix(EventPointer event, std::vector<Event>& waitForEvents, + const size_t src_one, const size_t src_two, const size_t src_ld, const size_t src_offset, const Buffer<T> &src, const size_t dest_one, const size_t dest_two, @@ -114,7 +120,7 @@ class Routine { // The OpenCL objects, accessible only from derived classes Queue queue_; - Event event_; + EventPointer event_; const Context context_; const Device device_; diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h index 689cf169..bc00c8e3 100644 --- a/include/internal/routines/level1/xaxpy.h +++ b/include/internal/routines/level1/xaxpy.h @@ -28,6 +28,7 @@ class Xaxpy: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -35,7 +36,7 @@ class Xaxpy: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xaxpy(Queue &queue, Event &event, const std::string &name = "AXPY"); + Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY"); // Templated-precision implementation of the routine StatusCode DoAxpy(const size_t n, const T alpha, diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h index 15f339aa..5786cb0f 100644 --- a/include/internal/routines/level1/xcopy.h +++ b/include/internal/routines/level1/xcopy.h @@ -28,6 +28,7 @@ class Xcopy: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -35,7 +36,7 @@ class Xcopy: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xcopy(Queue &queue, Event &event, const std::string &name = "COPY"); + Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY"); // Templated-precision implementation of the routine StatusCode DoCopy(const size_t n, diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h index 64b62945..95a7ad07 100644 --- a/include/internal/routines/level1/xdot.h +++ b/include/internal/routines/level1/xdot.h @@ -28,6 +28,7 @@ class Xdot: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; @@ -37,7 +38,7 @@ class Xdot: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xdot(Queue &queue, Event &event, const std::string &name = "DOT"); + Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT"); // Templated-precision implementation of the routine StatusCode DoDot(const size_t n, diff --git a/include/internal/routines/level1/xdotc.h b/include/internal/routines/level1/xdotc.h index 726cec7c..0dc2cfe9 100644 --- a/include/internal/routines/level1/xdotc.h +++ b/include/internal/routines/level1/xdotc.h @@ -28,7 +28,7 @@ class Xdotc: public Xdot<T> { using Xdot<T>::DoDot; // Constructor - Xdotc(Queue &queue, Event &event, const std::string &name = "DOTC"); + Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC"); // Templated-precision implementation of the routine StatusCode DoDotc(const size_t n, diff --git a/include/internal/routines/level1/xdotu.h b/include/internal/routines/level1/xdotu.h index 825ebb78..98988744 100644 --- a/include/internal/routines/level1/xdotu.h +++ b/include/internal/routines/level1/xdotu.h @@ -28,7 +28,7 @@ class Xdotu: public Xdot<T> { using Xdot<T>::DoDot; // Constructor - Xdotu(Queue &queue, Event &event, const std::string &name = "DOTU"); + Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU"); // Templated-precision implementation of the routine StatusCode DoDotu(const size_t n, diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h index b3fffef6..6f6ca74f 100644 --- a/include/internal/routines/level1/xnrm2.h +++ b/include/internal/routines/level1/xnrm2.h @@ -28,6 +28,7 @@ class Xnrm2: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; @@ -36,7 +37,7 @@ class Xnrm2: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xnrm2(Queue &queue, Event &event, const std::string &name = "NRM2"); + Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2"); // Templated-precision implementation of the routine StatusCode DoNrm2(const size_t n, diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h index d97b5a07..e10a201d 100644 --- a/include/internal/routines/level1/xscal.h +++ b/include/internal/routines/level1/xscal.h @@ -28,13 +28,14 @@ class Xscal: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; // Constructor - Xscal(Queue &queue, Event &event, const std::string &name = "SCAL"); + Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL"); // Templated-precision implementation of the routine StatusCode DoScal(const size_t n, const T alpha, diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h index fe79882b..0f240763 100644 --- a/include/internal/routines/level1/xswap.h +++ b/include/internal/routines/level1/xswap.h @@ -28,6 +28,7 @@ class Xswap: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -35,7 +36,7 @@ class Xswap: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xswap(Queue &queue, Event &event, const std::string &name = "SWAP"); + Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP"); // Templated-precision implementation of the routine StatusCode DoSwap(const size_t n, diff --git a/include/internal/routines/level2/xgbmv.h b/include/internal/routines/level2/xgbmv.h index 27b033e9..bc94c77d 100644 --- a/include/internal/routines/level2/xgbmv.h +++ b/include/internal/routines/level2/xgbmv.h @@ -30,7 +30,7 @@ class Xgbmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xgbmv(Queue &queue, Event &event, const std::string &name = "GBMV"); + Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV"); // Templated-precision implementation of the routine StatusCode DoGbmv(const Layout layout, const Transpose a_transpose, diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h index b31565ec..0b2a8e66 100644 --- a/include/internal/routines/level2/xgemv.h +++ b/include/internal/routines/level2/xgemv.h @@ -28,6 +28,7 @@ class Xgemv: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -37,7 +38,7 @@ class Xgemv: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xgemv(Queue &queue, Event &event, const std::string &name = "GEMV"); + Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV"); // Templated-precision implementation of the routine StatusCode DoGemv(const Layout layout, const Transpose a_transpose, diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h index 45ecea10..5ace9da6 100644 --- a/include/internal/routines/level2/xger.h +++ b/include/internal/routines/level2/xger.h @@ -28,6 +28,7 @@ class Xger: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -36,7 +37,7 @@ class Xger: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xger(Queue &queue, Event &event, const std::string &name = "GER"); + Xger(Queue &queue, EventPointer event, const std::string &name = "GER"); // Templated-precision implementation of the routine StatusCode DoGer(const Layout layout, diff --git a/include/internal/routines/level2/xgerc.h b/include/internal/routines/level2/xgerc.h index 8e515a14..6d06ef94 100644 --- a/include/internal/routines/level2/xgerc.h +++ b/include/internal/routines/level2/xgerc.h @@ -28,7 +28,7 @@ class Xgerc: public Xger<T> { using Xger<T>::DoGer; // Constructor - Xgerc(Queue &queue, Event &event, const std::string &name = "GERC"); + Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC"); // Templated-precision implementation of the routine StatusCode DoGerc(const Layout layout, diff --git a/include/internal/routines/level2/xgeru.h b/include/internal/routines/level2/xgeru.h index ec485c37..45ce1cba 100644 --- a/include/internal/routines/level2/xgeru.h +++ b/include/internal/routines/level2/xgeru.h @@ -28,7 +28,7 @@ class Xgeru: public Xger<T> { using Xger<T>::DoGer; // Constructor - Xgeru(Queue &queue, Event &event, const std::string &name = "GERU"); + Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU"); // Templated-precision implementation of the routine StatusCode DoGeru(const Layout layout, diff --git a/include/internal/routines/level2/xhbmv.h b/include/internal/routines/level2/xhbmv.h index 65138424..f0a6212c 100644 --- a/include/internal/routines/level2/xhbmv.h +++ b/include/internal/routines/level2/xhbmv.h @@ -30,7 +30,7 @@ class Xhbmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xhbmv(Queue &queue, Event &event, const std::string &name = "HBMV"); + Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV"); // Templated-precision implementation of the routine StatusCode DoHbmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xhemv.h b/include/internal/routines/level2/xhemv.h index b74db760..3daf2457 100644 --- a/include/internal/routines/level2/xhemv.h +++ b/include/internal/routines/level2/xhemv.h @@ -30,7 +30,7 @@ class Xhemv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xhemv(Queue &queue, Event &event, const std::string &name = "HEMV"); + Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV"); // Templated-precision implementation of the routine StatusCode DoHemv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h index 6322265b..861ba302 100644 --- a/include/internal/routines/level2/xher.h +++ b/include/internal/routines/level2/xher.h @@ -28,6 +28,7 @@ class Xher: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestMatrixA; @@ -36,7 +37,7 @@ class Xher: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xher(Queue &queue, Event &event, const std::string &name = "HER"); + Xher(Queue &queue, EventPointer event, const std::string &name = "HER"); // Translates alpha of type 'U' into type 'T' T GetAlpha(const U alpha); diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h index 26f69046..9a23199e 100644 --- a/include/internal/routines/level2/xher2.h +++ b/include/internal/routines/level2/xher2.h @@ -28,6 +28,7 @@ class Xher2: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::TestVectorX; using Routine<T>::TestVectorY; @@ -37,7 +38,7 @@ class Xher2: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xher2(Queue &queue, Event &event, const std::string &name = "HER2"); + Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2"); // Templated-precision implementation of the routine StatusCode DoHer2(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xhpmv.h b/include/internal/routines/level2/xhpmv.h index 48f1ed3f..a1d5595a 100644 --- a/include/internal/routines/level2/xhpmv.h +++ b/include/internal/routines/level2/xhpmv.h @@ -30,7 +30,7 @@ class Xhpmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xhpmv(Queue &queue, Event &event, const std::string &name = "HPMV"); + Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV"); // Templated-precision implementation of the routine StatusCode DoHpmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xhpr.h b/include/internal/routines/level2/xhpr.h index a0c3cb92..6554d74c 100644 --- a/include/internal/routines/level2/xhpr.h +++ b/include/internal/routines/level2/xhpr.h @@ -28,7 +28,7 @@ class Xhpr: public Xher<T,U> { using Xher<T,U>::DoHer; // Constructor - Xhpr(Queue &queue, Event &event, const std::string &name = "HPR"); + Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR"); // Templated-precision implementation of the routine StatusCode DoHpr(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xhpr2.h b/include/internal/routines/level2/xhpr2.h index fd243d33..d95e7b61 100644 --- a/include/internal/routines/level2/xhpr2.h +++ b/include/internal/routines/level2/xhpr2.h @@ -28,7 +28,7 @@ class Xhpr2: public Xher2<T> { using Xher2<T>::DoHer2; // Constructor - Xhpr2(Queue &queue, Event &event, const std::string &name = "HPR2"); + Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2"); // Templated-precision implementation of the routine StatusCode DoHpr2(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xsbmv.h b/include/internal/routines/level2/xsbmv.h index bb24d8f4..4328e377 100644 --- a/include/internal/routines/level2/xsbmv.h +++ b/include/internal/routines/level2/xsbmv.h @@ -30,7 +30,7 @@ class Xsbmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xsbmv(Queue &queue, Event &event, const std::string &name = "SBMV"); + Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV"); // Templated-precision implementation of the routine StatusCode DoSbmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xspmv.h b/include/internal/routines/level2/xspmv.h index 88f02a2f..ca3e28b6 100644 --- a/include/internal/routines/level2/xspmv.h +++ b/include/internal/routines/level2/xspmv.h @@ -30,7 +30,7 @@ class Xspmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xspmv(Queue &queue, Event &event, const std::string &name = "SPMV"); + Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV"); // Templated-precision implementation of the routine StatusCode DoSpmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xspr.h b/include/internal/routines/level2/xspr.h index 5b01d2cb..7e91abc5 100644 --- a/include/internal/routines/level2/xspr.h +++ b/include/internal/routines/level2/xspr.h @@ -28,7 +28,7 @@ class Xspr: public Xher<T,T> { using Xher<T,T>::DoHer; // Constructor - Xspr(Queue &queue, Event &event, const std::string &name = "SPR"); + Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR"); // Templated-precision implementation of the routine StatusCode DoSpr(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xspr2.h b/include/internal/routines/level2/xspr2.h index 3d5f4992..a34be8e8 100644 --- a/include/internal/routines/level2/xspr2.h +++ b/include/internal/routines/level2/xspr2.h @@ -28,7 +28,7 @@ class Xspr2: public Xher2<T> { using Xher2<T>::DoHer2; // Constructor - Xspr2(Queue &queue, Event &event, const std::string &name = "SPR2"); + Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2"); // Templated-precision implementation of the routine StatusCode DoSpr2(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xsymv.h b/include/internal/routines/level2/xsymv.h index c7b92702..98a0ce88 100644 --- a/include/internal/routines/level2/xsymv.h +++ b/include/internal/routines/level2/xsymv.h @@ -30,7 +30,7 @@ class Xsymv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xsymv(Queue &queue, Event &event, const std::string &name = "SYMV"); + Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV"); // Templated-precision implementation of the routine StatusCode DoSymv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xsyr.h b/include/internal/routines/level2/xsyr.h index 9704a881..f88498ae 100644 --- a/include/internal/routines/level2/xsyr.h +++ b/include/internal/routines/level2/xsyr.h @@ -28,7 +28,7 @@ class Xsyr: public Xher<T,T> { using Xher<T,T>::DoHer; // Constructor - Xsyr(Queue &queue, Event &event, const std::string &name = "SYR"); + Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR"); // Templated-precision implementation of the routine StatusCode DoSyr(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xsyr2.h b/include/internal/routines/level2/xsyr2.h index f4dc9375..d2d3143a 100644 --- a/include/internal/routines/level2/xsyr2.h +++ b/include/internal/routines/level2/xsyr2.h @@ -28,7 +28,7 @@ class Xsyr2: public Xher2<T> { using Xher2<T>::DoHer2; // Constructor - Xsyr2(Queue &queue, Event &event, const std::string &name = "SYR2"); + Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2"); // Templated-precision implementation of the routine StatusCode DoSyr2(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xtbmv.h b/include/internal/routines/level2/xtbmv.h index 89c90193..3b358080 100644 --- a/include/internal/routines/level2/xtbmv.h +++ b/include/internal/routines/level2/xtbmv.h @@ -34,7 +34,7 @@ class Xtbmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xtbmv(Queue &queue, Event &event, const std::string &name = "TBMV"); + Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV"); // Templated-precision implementation of the routine StatusCode DoTbmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xtpmv.h b/include/internal/routines/level2/xtpmv.h index 183d3505..f306cf4a 100644 --- a/include/internal/routines/level2/xtpmv.h +++ b/include/internal/routines/level2/xtpmv.h @@ -34,7 +34,7 @@ class Xtpmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xtpmv(Queue &queue, Event &event, const std::string &name = "TPMV"); + Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV"); // Templated-precision implementation of the routine StatusCode DoTpmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level2/xtrmv.h b/include/internal/routines/level2/xtrmv.h index dadfbc98..cf0824a4 100644 --- a/include/internal/routines/level2/xtrmv.h +++ b/include/internal/routines/level2/xtrmv.h @@ -34,7 +34,7 @@ class Xtrmv: public Xgemv<T> { using Xgemv<T>::MatVec; // Constructor - Xtrmv(Queue &queue, Event &event, const std::string &name = "TRMV"); + Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV"); // Templated-precision implementation of the routine StatusCode DoTrmv(const Layout layout, const Triangle triangle, diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h index 9b40a7fc..85fb0616 100644 --- a/include/internal/routines/level3/xgemm.h +++ b/include/internal/routines/level3/xgemm.h @@ -28,6 +28,7 @@ class Xgemm: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; @@ -38,7 +39,7 @@ class Xgemm: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xgemm(Queue &queue, Event &event, const std::string &name = "GEMM"); + Xgemm(Queue &queue, EventPointer event, const std::string &name = "GEMM"); // Templated-precision implementation of the routine StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h index ca38ca08..ec42b569 100644 --- a/include/internal/routines/level3/xhemm.h +++ b/include/internal/routines/level3/xhemm.h @@ -37,7 +37,7 @@ class Xhemm: public Xgemm<T> { using Xgemm<T>::DoGemm; // Constructor - Xhemm(Queue &queue, Event &event, const std::string &name = "HEMM"); + Xhemm(Queue &queue, EventPointer event, const std::string &name = "HEMM"); // Templated-precision implementation of the routine StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle, diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h index 7113a172..623afd49 100644 --- a/include/internal/routines/level3/xher2k.h +++ b/include/internal/routines/level3/xher2k.h @@ -30,6 +30,7 @@ class Xher2k: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; @@ -40,7 +41,7 @@ class Xher2k: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xher2k(Queue &queue, Event &event, const std::string &name = "HER2K"); + Xher2k(Queue &queue, EventPointer event, const std::string &name = "HER2K"); // Templated-precision implementation of the routine StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h index 47112c2c..629695ff 100644 --- a/include/internal/routines/level3/xherk.h +++ b/include/internal/routines/level3/xherk.h @@ -30,6 +30,7 @@ class Xherk: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; @@ -39,7 +40,7 @@ class Xherk: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xherk(Queue &queue, Event &event, const std::string &name = "HERK"); + Xherk(Queue &queue, EventPointer event, const std::string &name = "HERK"); // Templated-precision implementation of the routine StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose, diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h index 9fc80eb4..16ad6f53 100644 --- a/include/internal/routines/level3/xsymm.h +++ b/include/internal/routines/level3/xsymm.h @@ -39,7 +39,7 @@ class Xsymm: public Xgemm<T> { using Xgemm<T>::DoGemm; // Constructor - Xsymm(Queue &queue, Event &event, const std::string &name = "SYMM"); + Xsymm(Queue &queue, EventPointer event, const std::string &name = "SYMM"); // Templated-precision implementation of the routine StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle, diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h index c4679028..88669626 100644 --- a/include/internal/routines/level3/xsyr2k.h +++ b/include/internal/routines/level3/xsyr2k.h @@ -30,6 +30,7 @@ class Xsyr2k: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; @@ -40,7 +41,7 @@ class Xsyr2k: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xsyr2k(Queue &queue, Event &event, const std::string &name = "SYR2K"); + Xsyr2k(Queue &queue, EventPointer event, const std::string &name = "SYR2K"); // Templated-precision implementation of the routine StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h index abf6b681..e95c7c1c 100644 --- a/include/internal/routines/level3/xsyrk.h +++ b/include/internal/routines/level3/xsyrk.h @@ -32,6 +32,7 @@ class Xsyrk: public Routine<T> { using Routine<T>::db_; using Routine<T>::source_string_; using Routine<T>::queue_; + using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; @@ -41,7 +42,7 @@ class Xsyrk: public Routine<T> { using Routine<T>::ErrorIn; // Constructor - Xsyrk(Queue &queue, Event &event, const std::string &name = "SYRK"); + Xsyrk(Queue &queue, EventPointer event, const std::string &name = "SYRK"); // Templated-precision implementation of the routine StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h index a1f4d15c..01f6594d 100644 --- a/include/internal/routines/level3/xtrmm.h +++ b/include/internal/routines/level3/xtrmm.h @@ -38,7 +38,7 @@ class Xtrmm: public Xgemm<T> { using Xgemm<T>::DoGemm; // Constructor - Xtrmm(Queue &queue, Event &event, const std::string &name = "TRMM"); + Xtrmm(Queue &queue, EventPointer event, const std::string &name = "TRMM"); // Templated-precision implementation of the routine StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle, |