summaryrefslogtreecommitdiff
path: root/src/routines/common.cpp
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-09-13 21:14:51 +0200
committerGitHub <noreply@github.com>2016-09-13 21:14:51 +0200
commitf07ac22f5b57d22756d779d2e53620f988d786ee (patch)
treee8bcbc331683ca6fd807f5a5b83bb05c6e6fed69 /src/routines/common.cpp
parent7c13bacf129291e3e295ecb6e833788477085fa0 (diff)
parent4b94afda941a86f363064ff02f97e21eb9618794 (diff)
Merge pull request #99 from CNugteren/development
Update to version 0.9.0
Diffstat (limited to 'src/routines/common.cpp')
-rw-r--r--src/routines/common.cpp56
1 files changed, 33 insertions, 23 deletions
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index c378df28..3969cf9f 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -12,6 +12,7 @@
// =================================================================================================
#include <vector>
+#include <chrono>
#include "routines/common.hpp"
@@ -21,45 +22,54 @@ namespace clblast {
// Enqueues a kernel, waits for completion, and checks for errors
StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, std::vector<Event>& waitForEvents) {
+ EventPointer event, const std::vector<Event> &waitForEvents) {
- // Tests for validity of the local thread sizes
- if (local.size() > device.MaxWorkItemDimensions()) {
- return StatusCode::kInvalidLocalNumDimensions;
- }
- const auto max_work_item_sizes = device.MaxWorkItemSizes();
- for (auto i=size_t{0}; i<local.size(); ++i) {
- if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
- }
- auto local_size = size_t{1};
- for (auto &item: local) { local_size *= item; }
- if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
+ if (!local.empty()) {
+ // Tests for validity of the local thread sizes
+ if (local.size() > device.MaxWorkItemDimensions()) {
+ return StatusCode::kInvalidLocalNumDimensions;
+ }
+ const auto max_work_item_sizes = device.MaxWorkItemSizes();
+ for (auto i=size_t{0}; i<local.size(); ++i) {
+ if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
+ }
+ auto local_size = size_t{1};
+ for (auto &item: local) { local_size *= item; }
+ if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
- // Make sure the global thread sizes are at least equal to the local sizes
- for (auto i=size_t{0}; i<global.size(); ++i) {
- if (global[i] < local[i]) { global[i] = local[i]; }
+ // Make sure the global thread sizes are at least equal to the local sizes
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ if (global[i] < local[i]) { global[i] = local[i]; }
+ }
}
// Tests for local memory usage
const auto local_mem_usage = kernel.LocalMemUsage(device);
if (!device.IsLocalMemoryValid(local_mem_usage)) { return StatusCode::kInvalidLocalMemUsage; }
+ // Prints the name of the kernel to launch in case of debugging in verbose mode
+ #ifdef VERBOSE
+ queue.Finish();
+ printf("[DEBUG] Running kernel '%s'\n", kernel.GetFunctionName().c_str());
+ const auto start_time = std::chrono::steady_clock::now();
+ #endif
+
// Launches the kernel (and checks for launch errors)
try {
kernel.Launch(queue, global, local, event, waitForEvents);
} catch (...) { return StatusCode::kKernelLaunchError; }
+ // Prints the elapsed execution time in case of debugging in verbose mode
+ #ifdef VERBOSE
+ queue.Finish();
+ const auto elapsed_time = std::chrono::steady_clock::now() - start_time;
+ const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
+ printf("[DEBUG] Completed kernel in %.2lf ms\n", timing);
+ #endif
+
// No errors, normal termination of this function
return StatusCode::kSuccess;
}
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event) {
- auto emptyWaitingList = std::vector<Event>();
- return RunKernel(kernel, queue, device, global, local, event, emptyWaitingList);
-}
-
// =================================================================================================
} // namespace clblast