summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/tuning/tuning.cpp22
1 files changed, 16 insertions, 6 deletions
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index 54ba33da..0fc6112a 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -220,8 +220,13 @@ void Tuner(int argc, char* argv[], const int V,
}
// Sets the thread configuration
- const auto global = settings.global_size_ref;
- const auto local = settings.local_size_ref;
+ auto global = settings.global_size_ref;
+ auto local = settings.local_size_ref;
+
+ // Make sure that the global worksize is a multiple of the local
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ while ((global[i] / local[i]) * local[i] != global[i]) { global[i]++; }
+ }
printf("%5zu %5zu | %5zu %5zu |", local[0], local[1], global[0], global[1]);
// Compiles the kernel
@@ -269,10 +274,15 @@ void Tuner(int argc, char* argv[], const int V,
}
// Sets the thread configuration
- const auto global = SetThreadConfiguration(configuration, settings.global_size,
- settings.mul_global, settings.div_global);
- const auto local = SetThreadConfiguration(configuration, settings.local_size,
- settings.mul_local, settings.div_local);
+ auto global = SetThreadConfiguration(configuration, settings.global_size,
+ settings.mul_global, settings.div_global);
+ auto local = SetThreadConfiguration(configuration, settings.local_size,
+ settings.mul_local, settings.div_local);
+
+ // Make sure that the global worksize is a multiple of the local
+ for (auto i=size_t{0}; i<global.size(); ++i) {
+ while ((global[i] / local[i]) * local[i] != global[i]) { global[i]++; }
+ }
printf("%5zu %5zu | %5zu %5zu |", local[0], local[1], global[0], global[1]);
// Sets the parameters for this configuration