summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/clpp11.hpp13
-rw-r--r--src/routines/common.cpp38
-rw-r--r--src/routines/common.hpp7
3 files changed, 19 insertions, 39 deletions
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index debfff09..d57223dd 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -695,23 +695,14 @@ class Kernel {
if (waitEvent()) { waitForEventsPlain.push_back(waitEvent()); }
}
- if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
-
// Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
- nullptr, global.data(), local.data(),
+ nullptr, global.data(), !local.empty() ? local.data() : nullptr,
static_cast<cl_uint>(waitForEventsPlain.size()),
- waitForEventsPlain.data(),
+ !waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr,
event));
}
- // As above, but with the default local workgroup size
- void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
- CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
- nullptr, global.data(), nullptr,
- 0, nullptr, event));
- }
-
// Accessor to the private data-member
const cl_kernel& operator()() const { return *kernel_; }
private:
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index 21e16954..3969cf9f 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -24,21 +24,23 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
EventPointer event, const std::vector<Event> &waitForEvents) {
- // Tests for validity of the local thread sizes
- if (local.size() > device.MaxWorkItemDimensions()) {
- return StatusCode::kInvalidLocalNumDimensions;
- }
- const auto max_work_item_sizes = device.MaxWorkItemSizes();
- for (auto i=size_t{0}; i<local.size(); ++i) {
- if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
- }
- auto local_size = size_t{1};
- for (auto &item: local) { local_size *= item; }
- if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
+ if (!local.empty()) {
+ // Tests for validity of the local thread sizes
+ if (local.size() > device.MaxWorkItemDimensions()) {
+ return StatusCode::kInvalidLocalNumDimensions;
+ }
+ const auto max_work_item_sizes = device.MaxWorkItemSizes();
+ for (auto i=size_t{0}; i<local.size(); ++i) {
+ if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
+ }
+ auto local_size = size_t{1};
+ for (auto &item: local) { local_size *= item; }
+ if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
- // Make sure the global thread sizes are at least equal to the local sizes
- for (auto i=size_t{0}; i<global.size(); ++i) {
- if (global[i] < local[i]) { global[i] = local[i]; }
+ // Make sure the global thread sizes are at least equal to the local sizes
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ if (global[i] < local[i]) { global[i] = local[i]; }
+ }
}
// Tests for local memory usage
@@ -69,13 +71,5 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
return StatusCode::kSuccess;
}
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event) {
- auto emptyWaitingList = std::vector<Event>();
- return RunKernel(kernel, queue, device, global, local, event, emptyWaitingList);
-}
-
// =================================================================================================
} // namespace clblast
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index d0bbc707..9d8849c3 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -29,12 +29,7 @@ namespace clblast {
// Enqueues a kernel, waits for completion, and checks for errors
StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, const std::vector<Event> &waitForEvents);
-
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event);
+ EventPointer event, const std::vector<Event> &waitForEvents = {});
// =================================================================================================