summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-31 22:37:06 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-31 22:37:06 +0200
commit5702bff5ad579466397f6537dc8925ebd64e3ba3 (patch)
treeb375ab600530fda9ac42aa0ab908b1f8fcd7042a /src
parente609220393654e5400b0acd0d362367f5fe28ab8 (diff)
Added error-checking for half-empty local work group sizes; fixed a minor TRSV global worksize issue
Diffstat (limited to 'src')
-rw-r--r--src/routines/common.cpp9
-rw-r--r--src/routines/level2/xtrsv.cpp2
-rw-r--r--src/routines/levelx/xinvert.cpp6
3 files changed, 13 insertions, 4 deletions
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index d3c402bd..695785c4 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -13,6 +13,7 @@
#include <vector>
#include <chrono>
+#include <iostream>
#include "routines/common.hpp"
@@ -46,6 +47,14 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
for (auto i=size_t{0}; i<global.size(); ++i) {
if (global[i] < local[i]) { global[i] = local[i]; }
}
+
+ // Verify that the global thread sizes are a multiple of the local sizes
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ if ((global[i] / local[i]) * local[i] != global[i]) {
+ throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsDim,
+ ToString(global[i]) + " is not divisible by " + ToString(local[i]));
+ }
+ }
}
// Tests for local memory usage
diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp
index 5d2ddf54..76401753 100644
--- a/src/routines/level2/xtrsv.cpp
+++ b/src/routines/level2/xtrsv.cpp
@@ -68,7 +68,7 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle,
// Launches the kernel
const auto local = std::vector<size_t>{db_["TRSV_BLOCK_SIZE"]};
- const auto global = std::vector<size_t>{1};
+ const auto global = std::vector<size_t>{Ceil(n, db_["TRSV_BLOCK_SIZE"])};
auto event = Event();
RunKernel(kernel, queue_, device_, global, local, event.pointer());
event.WaitForCompletion();
diff --git a/src/routines/levelx/xinvert.cpp b/src/routines/levelx/xinvert.cpp
index 09ef3ec1..99f196ec 100644
--- a/src/routines/levelx/xinvert.cpp
+++ b/src/routines/levelx/xinvert.cpp
@@ -95,11 +95,11 @@ void Xinvert<T>::InvertMatrixDiagonalBlocks(const Layout layout, const Triangle
kernel.SetArgument(5, static_cast<int>(block_size));
kernel.SetArgument(6, static_cast<int>(unit_diagonal));
kernel.SetArgument(7, static_cast<int>(is_upper));
- const auto local = std::vector<size_t>{internal_block_size};
- const auto global = std::vector<size_t>{num_internal_blocks * internal_block_size};
+ const auto local_invert = std::vector<size_t>{internal_block_size};
+ const auto global_invert = std::vector<size_t>{num_internal_blocks * internal_block_size};
auto base_kernel_event = Event();
auto base_kernel_event_pointer = (internal_block_size == block_size) ? event_ : base_kernel_event.pointer();
- RunKernel(kernel, queue_, device_, global, local, base_kernel_event_pointer, event_wait_list);
+ RunKernel(kernel, queue_, device_, global_invert, local_invert, base_kernel_event_pointer, event_wait_list);
if (internal_block_size == block_size) { event_wait_list.push_back(base_kernel_event); }
// Builds up block_size x block_size blocks. For example, internal_block_size=16: