diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-31 22:37:06 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-31 22:37:06 +0200 |
commit | 5702bff5ad579466397f6537dc8925ebd64e3ba3 (patch) | |
tree | b375ab600530fda9ac42aa0ab908b1f8fcd7042a | |
parent | e609220393654e5400b0acd0d362367f5fe28ab8 (diff) |
Added error-checking for half-empty local work group sizes; fixed a minor TRSV global worksize issue
-rw-r--r-- | src/routines/common.cpp | 9 | ||||
-rw-r--r-- | src/routines/level2/xtrsv.cpp | 2 | ||||
-rw-r--r-- | src/routines/levelx/xinvert.cpp | 6 |
3 files changed, 13 insertions, 4 deletions
diff --git a/src/routines/common.cpp b/src/routines/common.cpp index d3c402bd..695785c4 100644 --- a/src/routines/common.cpp +++ b/src/routines/common.cpp @@ -13,6 +13,7 @@ #include <vector> #include <chrono> +#include <iostream> #include "routines/common.hpp" @@ -46,6 +47,14 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device, for (auto i=size_t{0}; i<global.size(); ++i) { if (global[i] < local[i]) { global[i] = local[i]; } } + + // Verify that the global thread sizes are a multiple of the local sizes + for (auto i=size_t{0}; i<global.size(); ++i) { + if ((global[i] / local[i]) * local[i] != global[i]) { + throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsDim, + ToString(global[i]) + " is not divisible by " + ToString(local[i])); + } + } } // Tests for local memory usage diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp index 5d2ddf54..76401753 100644 --- a/src/routines/level2/xtrsv.cpp +++ b/src/routines/level2/xtrsv.cpp @@ -68,7 +68,7 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle, // Launches the kernel const auto local = std::vector<size_t>{db_["TRSV_BLOCK_SIZE"]}; - const auto global = std::vector<size_t>{1}; + const auto global = std::vector<size_t>{Ceil(n, db_["TRSV_BLOCK_SIZE"])}; auto event = Event(); RunKernel(kernel, queue_, device_, global, local, event.pointer()); event.WaitForCompletion(); diff --git a/src/routines/levelx/xinvert.cpp b/src/routines/levelx/xinvert.cpp index 09ef3ec1..99f196ec 100644 --- a/src/routines/levelx/xinvert.cpp +++ b/src/routines/levelx/xinvert.cpp @@ -95,11 +95,11 @@ void Xinvert<T>::InvertMatrixDiagonalBlocks(const Layout layout, const Triangle kernel.SetArgument(5, static_cast<int>(block_size)); kernel.SetArgument(6, static_cast<int>(unit_diagonal)); kernel.SetArgument(7, static_cast<int>(is_upper)); - const auto local = std::vector<size_t>{internal_block_size}; - const auto global = std::vector<size_t>{num_internal_blocks * internal_block_size}; + const auto local_invert = std::vector<size_t>{internal_block_size}; + const auto global_invert = std::vector<size_t>{num_internal_blocks * internal_block_size}; auto base_kernel_event = Event(); auto base_kernel_event_pointer = (internal_block_size == block_size) ? event_ : base_kernel_event.pointer(); - RunKernel(kernel, queue_, device_, global, local, base_kernel_event_pointer, event_wait_list); + RunKernel(kernel, queue_, device_, global_invert, local_invert, base_kernel_event_pointer, event_wait_list); if (internal_block_size == block_size) { event_wait_list.push_back(base_kernel_event); } // Builds up block_size x block_size blocks. For example, internal_block_size=16: |