diff options
Diffstat (limited to 'test/routines/level3/xgemm.hpp')
-rw-r--r-- | test/routines/level3/xgemm.hpp | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index 7e0ead6d..1c430c1c 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -22,7 +22,7 @@ namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class -template <typename T> +template <int V, typename T> // 'V' is the version of the kernel (0 for default, 1 for 'in-direct', 2 for 'direct') class TestXgemm { public: @@ -83,6 +83,13 @@ class TestXgemm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + if (V != 0) { + const auto device = queue.GetDevice(); + const auto switch_threshold = (V == 1) ? size_t{0} : size_t{1024 * 1024 * 1024}; // large enough for tests + const auto override_status = OverrideParameters(device(), "KernelSelection", PrecisionValue<T>(), + {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}}); + if (override_status != StatusCode::kSuccess) { return override_status; } + } auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, |