summaryrefslogtreecommitdiff
path: root/src/routines/level1
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-04-09 22:22:24 -0600
committercnugteren <web@cedricnugteren.nl>2016-04-09 22:22:24 -0600
commit1d3d38a2618c5663bf1549b08805137fd85f2efa (patch)
tree5de200346fc3d87c6e353d84744c59b2c703c16d /src/routines/level1
parentc2cfee76c4d8f7486d5b62b3e0a878867a32a070 (diff)
Events are now properly implemented using event waiting list and asking the user to wait for event completion
Diffstat (limited to 'src/routines/level1')
-rw-r--r--src/routines/level1/xaxpy.cc6
-rw-r--r--src/routines/level1/xcopy.cc6
-rw-r--r--src/routines/level1/xdot.cc11
-rw-r--r--src/routines/level1/xdotc.cc2
-rw-r--r--src/routines/level1/xdotu.cc2
-rw-r--r--src/routines/level1/xnrm2.cc10
-rw-r--r--src/routines/level1/xscal.cc6
-rw-r--r--src/routines/level1/xswap.cc6
8 files changed, 29 insertions, 20 deletions
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index c5acaf49..37d23543 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -29,7 +29,7 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xaxpy<T>::Xaxpy(Queue &queue, Event &event, const std::string &name):
+Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
@@ -89,13 +89,13 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
if (use_fast_kernel) {
auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
else {
auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
if (ErrorIn(status)) { return status; }
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 8c7f8671..04508383 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -29,7 +29,7 @@ template <> const Precision Xcopy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xcopy<T>::Xcopy(Queue &queue, Event &event, const std::string &name):
+Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
@@ -87,13 +87,13 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
if (use_fast_kernel) {
auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
else {
auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
if (ErrorIn(status)) { return status; }
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index e22b0f8b..4813a004 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -29,7 +29,7 @@ template <> const Precision Xdot<double2>::precision_ = Precision::kComplexDoubl
// Constructor: forwards to base class constructor
template <typename T>
-Xdot<T>::Xdot(Queue &queue, Event &event, const std::string &name):
+Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xdot"}, precision_) {
source_string_ =
#include "../../kernels/level1/xdot.opencl"
@@ -78,11 +78,16 @@ StatusCode Xdot<T>::DoDot(const size_t n,
kernel1.SetArgument(7, temp_buffer());
kernel1.SetArgument(8, static_cast<int>(do_conjugate));
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
+
// Launches the main kernel
auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
auto local1 = std::vector<size_t>{db_["WGS1"]};
- status = RunKernel(kernel1, global1, local1);
+ auto kernelEvent = Event();
+ status = RunKernel(kernel1, global1, local1, kernelEvent.pointer());
if (ErrorIn(status)) { return status; }
+ eventWaitList.push_back(kernelEvent);
// Sets the arguments for the epilogue kernel
kernel2.SetArgument(0, temp_buffer());
@@ -92,7 +97,7 @@ StatusCode Xdot<T>::DoDot(const size_t n,
// Launches the epilogue kernel
auto global2 = std::vector<size_t>{db_["WGS2"]};
auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, global2, local2);
+ status = RunKernel(kernel2, global2, local2, event_, eventWaitList);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
diff --git a/src/routines/level1/xdotc.cc b/src/routines/level1/xdotc.cc
index f414f556..b3a01079 100644
--- a/src/routines/level1/xdotc.cc
+++ b/src/routines/level1/xdotc.cc
@@ -21,7 +21,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
-Xdotc<T>::Xdotc(Queue &queue, Event &event, const std::string &name):
+Xdotc<T>::Xdotc(Queue &queue, EventPointer event, const std::string &name):
Xdot<T>(queue, event, name) {
}
diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc
index 28d9b730..8dded6e0 100644
--- a/src/routines/level1/xdotu.cc
+++ b/src/routines/level1/xdotu.cc
@@ -20,7 +20,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
-Xdotu<T>::Xdotu(Queue &queue, Event &event, const std::string &name):
+Xdotu<T>::Xdotu(Queue &queue, EventPointer event, const std::string &name):
Xdot<T>(queue, event, name) {
}
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index 685eb29f..04e4137c 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -29,7 +29,7 @@ template <> const Precision Xnrm2<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xnrm2<T>::Xnrm2(Queue &queue, Event &event, const std::string &name):
+Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xdot"}, precision_) {
source_string_ =
#include "../../kernels/level1/xnrm2.opencl"
@@ -69,12 +69,16 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
kernel1.SetArgument(2, static_cast<int>(x_offset));
kernel1.SetArgument(3, static_cast<int>(x_inc));
kernel1.SetArgument(4, temp_buffer());
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
// Launches the main kernel
auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
auto local1 = std::vector<size_t>{db_["WGS1"]};
- status = RunKernel(kernel1, global1, local1);
+ auto kernelEvent = Event();
+ status = RunKernel(kernel1, global1, local1, kernelEvent.pointer());
if (ErrorIn(status)) { return status; }
+ eventWaitList.push_back(kernelEvent);
// Sets the arguments for the epilogue kernel
kernel2.SetArgument(0, temp_buffer());
@@ -84,7 +88,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
// Launches the epilogue kernel
auto global2 = std::vector<size_t>{db_["WGS2"]};
auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, global2, local2);
+ status = RunKernel(kernel2, global2, local2, event_, eventWaitList);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index 57bbe9e8..e83e73fd 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -29,7 +29,7 @@ template <> const Precision Xscal<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xscal<T>::Xscal(Queue &queue, Event &event, const std::string &name):
+Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
@@ -81,13 +81,13 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
if (use_fast_kernel) {
auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
else {
auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
if (ErrorIn(status)) { return status; }
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index c986b3fb..bc425f40 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -29,7 +29,7 @@ template <> const Precision Xswap<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xswap<T>::Xswap(Queue &queue, Event &event, const std::string &name):
+Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name):
Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
@@ -87,13 +87,13 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
if (use_fast_kernel) {
auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
else {
auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, global, local);
+ status = RunKernel(kernel, global, local, event_);
}
if (ErrorIn(status)) { return status; }