diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-04-09 22:22:24 -0600 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-04-09 22:22:24 -0600 |
commit | 1d3d38a2618c5663bf1549b08805137fd85f2efa (patch) | |
tree | 5de200346fc3d87c6e353d84744c59b2c703c16d /src/routines/level1 | |
parent | c2cfee76c4d8f7486d5b62b3e0a878867a32a070 (diff) |
Events are now properly implemented using event waiting list and asking the user to wait for event completion
Diffstat (limited to 'src/routines/level1')
-rw-r--r-- | src/routines/level1/xaxpy.cc | 6 | ||||
-rw-r--r-- | src/routines/level1/xcopy.cc | 6 | ||||
-rw-r--r-- | src/routines/level1/xdot.cc | 11 | ||||
-rw-r--r-- | src/routines/level1/xdotc.cc | 2 | ||||
-rw-r--r-- | src/routines/level1/xdotu.cc | 2 | ||||
-rw-r--r-- | src/routines/level1/xnrm2.cc | 10 | ||||
-rw-r--r-- | src/routines/level1/xscal.cc | 6 | ||||
-rw-r--r-- | src/routines/level1/xswap.cc | 6 |
8 files changed, 29 insertions, 20 deletions
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index c5acaf49..37d23543 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -29,7 +29,7 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xaxpy<T>::Xaxpy(Queue &queue, Event &event, const std::string &name): +Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/level1/level1.opencl" @@ -89,13 +89,13 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, if (use_fast_kernel) { auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } else { auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } if (ErrorIn(status)) { return status; } diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc index 8c7f8671..04508383 100644 --- a/src/routines/level1/xcopy.cc +++ b/src/routines/level1/xcopy.cc @@ -29,7 +29,7 @@ template <> const Precision Xcopy<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xcopy<T>::Xcopy(Queue &queue, Event &event, const std::string &name): +Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/level1/level1.opencl" @@ -87,13 +87,13 @@ StatusCode Xcopy<T>::DoCopy(const size_t n, if (use_fast_kernel) { auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } else { auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } if (ErrorIn(status)) { return status; } diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc index e22b0f8b..4813a004 100644 --- a/src/routines/level1/xdot.cc +++ b/src/routines/level1/xdot.cc @@ -29,7 +29,7 @@ template <> const Precision Xdot<double2>::precision_ = Precision::kComplexDoubl // Constructor: forwards to base class constructor template <typename T> -Xdot<T>::Xdot(Queue &queue, Event &event, const std::string &name): +Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xdot"}, precision_) { source_string_ = #include "../../kernels/level1/xdot.opencl" @@ -78,11 +78,16 @@ StatusCode Xdot<T>::DoDot(const size_t n, kernel1.SetArgument(7, temp_buffer()); kernel1.SetArgument(8, static_cast<int>(do_conjugate)); + // Event waiting list + auto eventWaitList = std::vector<Event>(); + // Launches the main kernel auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size}; auto local1 = std::vector<size_t>{db_["WGS1"]}; - status = RunKernel(kernel1, global1, local1); + auto kernelEvent = Event(); + status = RunKernel(kernel1, global1, local1, kernelEvent.pointer()); if (ErrorIn(status)) { return status; } + eventWaitList.push_back(kernelEvent); // Sets the arguments for the epilogue kernel kernel2.SetArgument(0, temp_buffer()); @@ -92,7 +97,7 @@ StatusCode Xdot<T>::DoDot(const size_t n, // Launches the epilogue kernel auto global2 = std::vector<size_t>{db_["WGS2"]}; auto local2 = std::vector<size_t>{db_["WGS2"]}; - status = RunKernel(kernel2, global2, local2); + status = RunKernel(kernel2, global2, local2, event_, eventWaitList); if (ErrorIn(status)) { return status; } // Succesfully finished the computation diff --git a/src/routines/level1/xdotc.cc b/src/routines/level1/xdotc.cc index f414f556..b3a01079 100644 --- a/src/routines/level1/xdotc.cc +++ b/src/routines/level1/xdotc.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> -Xdotc<T>::Xdotc(Queue &queue, Event &event, const std::string &name): +Xdotc<T>::Xdotc(Queue &queue, EventPointer event, const std::string &name): Xdot<T>(queue, event, name) { } diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc index 28d9b730..8dded6e0 100644 --- a/src/routines/level1/xdotu.cc +++ b/src/routines/level1/xdotu.cc @@ -20,7 +20,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> -Xdotu<T>::Xdotu(Queue &queue, Event &event, const std::string &name): +Xdotu<T>::Xdotu(Queue &queue, EventPointer event, const std::string &name): Xdot<T>(queue, event, name) { } diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc index 685eb29f..04e4137c 100644 --- a/src/routines/level1/xnrm2.cc +++ b/src/routines/level1/xnrm2.cc @@ -29,7 +29,7 @@ template <> const Precision Xnrm2<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xnrm2<T>::Xnrm2(Queue &queue, Event &event, const std::string &name): +Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xdot"}, precision_) { source_string_ = #include "../../kernels/level1/xnrm2.opencl" @@ -69,12 +69,16 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n, kernel1.SetArgument(2, static_cast<int>(x_offset)); kernel1.SetArgument(3, static_cast<int>(x_inc)); kernel1.SetArgument(4, temp_buffer()); + // Event waiting list + auto eventWaitList = std::vector<Event>(); // Launches the main kernel auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size}; auto local1 = std::vector<size_t>{db_["WGS1"]}; - status = RunKernel(kernel1, global1, local1); + auto kernelEvent = Event(); + status = RunKernel(kernel1, global1, local1, kernelEvent.pointer()); if (ErrorIn(status)) { return status; } + eventWaitList.push_back(kernelEvent); // Sets the arguments for the epilogue kernel kernel2.SetArgument(0, temp_buffer()); @@ -84,7 +88,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n, // Launches the epilogue kernel auto global2 = std::vector<size_t>{db_["WGS2"]}; auto local2 = std::vector<size_t>{db_["WGS2"]}; - status = RunKernel(kernel2, global2, local2); + status = RunKernel(kernel2, global2, local2, event_, eventWaitList); if (ErrorIn(status)) { return status; } // Succesfully finished the computation diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc index 57bbe9e8..e83e73fd 100644 --- a/src/routines/level1/xscal.cc +++ b/src/routines/level1/xscal.cc @@ -29,7 +29,7 @@ template <> const Precision Xscal<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xscal<T>::Xscal(Queue &queue, Event &event, const std::string &name): +Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/level1/level1.opencl" @@ -81,13 +81,13 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha, if (use_fast_kernel) { auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } else { auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } if (ErrorIn(status)) { return status; } diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc index c986b3fb..bc425f40 100644 --- a/src/routines/level1/xswap.cc +++ b/src/routines/level1/xswap.cc @@ -29,7 +29,7 @@ template <> const Precision Xswap<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xswap<T>::Xswap(Queue &queue, Event &event, const std::string &name): +Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name): Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/level1/level1.opencl" @@ -87,13 +87,13 @@ StatusCode Xswap<T>::DoSwap(const size_t n, if (use_fast_kernel) { auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } else { auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, global, local); + status = RunKernel(kernel, global, local, event_); } if (ErrorIn(status)) { return status; } |