summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2020-05-10 20:28:23 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2020-05-10 20:28:23 +0200
commit4a6c7c37a34692a77d2cdce770898ce4cf319295 (patch)
tree26e531bdb69b750ebd43f7b13ecbad6e1680d278 /src
parent69a4b4d4b08c425b7ede4747a04af39d539d0a7d (diff)
Made sure that the global workgroup size is a multiple of the local size in the tuners
Diffstat (limited to 'src')
-rw-r--r--src/tuning/tuning.cpp22
1 files changed, 16 insertions, 6 deletions
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index 54ba33da..0fc6112a 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -220,8 +220,13 @@ void Tuner(int argc, char* argv[], const int V,
}
// Sets the thread configuration
- const auto global = settings.global_size_ref;
- const auto local = settings.local_size_ref;
+ auto global = settings.global_size_ref;
+ auto local = settings.local_size_ref;
+
+ // Make sure that the global worksize is a multiple of the local
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ while ((global[i] / local[i]) * local[i] != global[i]) { global[i]++; }
+ }
printf("%5zu %5zu | %5zu %5zu |", local[0], local[1], global[0], global[1]);
// Compiles the kernel
@@ -269,10 +274,15 @@ void Tuner(int argc, char* argv[], const int V,
}
// Sets the thread configuration
- const auto global = SetThreadConfiguration(configuration, settings.global_size,
- settings.mul_global, settings.div_global);
- const auto local = SetThreadConfiguration(configuration, settings.local_size,
- settings.mul_local, settings.div_local);
+ auto global = SetThreadConfiguration(configuration, settings.global_size,
+ settings.mul_global, settings.div_global);
+ auto local = SetThreadConfiguration(configuration, settings.local_size,
+ settings.mul_local, settings.div_local);
+
+ // Make sure that the global worksize is a multiple of the local
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ while ((global[i] / local[i]) * local[i] != global[i]) { global[i]++; }
+ }
printf("%5zu %5zu | %5zu %5zu |", local[0], local[1], global[0], global[1]);
// Sets the parameters for this configuration