summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorIvan Shapovalov <intelfx@intelfx.name>2016-07-11 23:14:43 +0300
committerIvan Shapovalov <intelfx@intelfx.name>2016-07-22 11:15:52 +0300
commitae3299da302ba2c26f1e3490a1a7ee389d91feee (patch)
tree2dad4c3714a4dfd638da2dcaedf50b6d84bda0c1 /src/routines
parent5502c5eec4c7b56c3b44ef04046d1621d58be47f (diff)
clblast::RunKernel, cl::Kernel: unify variants with/without waitForEvents, support empty LWS
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/common.cpp38
-rw-r--r--src/routines/common.hpp7
2 files changed, 17 insertions, 28 deletions
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index 21e16954..3969cf9f 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -24,21 +24,23 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
EventPointer event, const std::vector<Event> &waitForEvents) {
- // Tests for validity of the local thread sizes
- if (local.size() > device.MaxWorkItemDimensions()) {
- return StatusCode::kInvalidLocalNumDimensions;
- }
- const auto max_work_item_sizes = device.MaxWorkItemSizes();
- for (auto i=size_t{0}; i<local.size(); ++i) {
- if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
- }
- auto local_size = size_t{1};
- for (auto &item: local) { local_size *= item; }
- if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
+ if (!local.empty()) {
+ // Tests for validity of the local thread sizes
+ if (local.size() > device.MaxWorkItemDimensions()) {
+ return StatusCode::kInvalidLocalNumDimensions;
+ }
+ const auto max_work_item_sizes = device.MaxWorkItemSizes();
+ for (auto i=size_t{0}; i<local.size(); ++i) {
+ if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
+ }
+ auto local_size = size_t{1};
+ for (auto &item: local) { local_size *= item; }
+ if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
- // Make sure the global thread sizes are at least equal to the local sizes
- for (auto i=size_t{0}; i<global.size(); ++i) {
- if (global[i] < local[i]) { global[i] = local[i]; }
+ // Make sure the global thread sizes are at least equal to the local sizes
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ if (global[i] < local[i]) { global[i] = local[i]; }
+ }
}
// Tests for local memory usage
@@ -69,13 +71,5 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
return StatusCode::kSuccess;
}
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event) {
- auto emptyWaitingList = std::vector<Event>();
- return RunKernel(kernel, queue, device, global, local, event, emptyWaitingList);
-}
-
// =================================================================================================
} // namespace clblast
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index d0bbc707..9d8849c3 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -29,12 +29,7 @@ namespace clblast {
// Enqueues a kernel, waits for completion, and checks for errors
StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, const std::vector<Event> &waitForEvents);
-
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event);
+ EventPointer event, const std::vector<Event> &waitForEvents = {});
// =================================================================================================