summaryrefslogtreecommitdiff
path: root/test/routines/level3/xgemm.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'test/routines/level3/xgemm.hpp')
-rw-r--r--test/routines/level3/xgemm.hpp9
1 files changed, 8 insertions, 1 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index 7e0ead6d..1c430c1c 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -22,7 +22,7 @@ namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
-template <typename T>
+template <int V, typename T> // 'V' is the version of the kernel (0 for default, 1 for 'in-direct', 2 for 'direct')
class TestXgemm {
public:
@@ -83,6 +83,13 @@ class TestXgemm {
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ if (V != 0) {
+ const auto device = queue.GetDevice();
+ const auto switch_threshold = (V == 1) ? size_t{0} : size_t{1024 * 1024 * 1024}; // large enough for tests
+ const auto override_status = OverrideParameters(device(), "KernelSelection", PrecisionValue<T>(),
+ {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}});
+ if (override_status != StatusCode::kSuccess) { return override_status; }
+ }
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemm(args.layout, args.a_transpose, args.b_transpose,