diff options
Diffstat (limited to 'test/routines/level3')
-rw-r--r-- | test/routines/level3/xgemm.hpp | 39 | ||||
-rw-r--r-- | test/routines/level3/xhemm.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xher2k.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xherk.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xsymm.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xsyr2k.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xsyrk.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xtrmm.hpp | 2 | ||||
-rw-r--r-- | test/routines/level3/xtrsm.hpp | 2 |
9 files changed, 36 insertions, 19 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index fe8cf7b9..4cfa9c83 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -37,7 +37,8 @@ class TestXgemm { kArgAOffset, kArgBOffset, kArgCOffset, kArgAlpha, kArgBeta}; } - static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; } + static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC, + kBufMatAP}; } // used as temp buffer static std::vector<std::string> BuffersOut() { return {kBufMatC}; } // Describes how to obtain the sizes of the buffers @@ -60,10 +61,33 @@ class TestXgemm { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue &queue) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); args.c_size = GetSizeC(args); + + // Optionally (V != 0) enforces indirect (V == 1) or direct (V == 2) kernels + if (V != 0) { + const auto device = queue.GetDevice(); + const auto switch_threshold = (V == 1) ? size_t{0} : size_t{4096}; // large enough for tests + const auto override_status = OverrideParameters(device(), "GemmRoutine", PrecisionValue<T>(), + {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}}); + if (override_status != StatusCode::kSuccess) { } + } + + // Sets the size of the temporary buffer (optional argument to GEMM) + auto temp_buffer_size = size_t{0}; + #ifdef OPENCL_API + auto queue_plain = queue(); + GemmTempBufferSize<T>(args.layout, args.a_transpose, args.b_transpose, args.m, args.n, args.k, + args.a_offset, args.a_ld, args.b_offset, args.b_ld, args.c_offset, args.c_ld, + &queue_plain, temp_buffer_size); + #elif CUDA_API + GemmTempBufferSize<T>(args.layout, args.a_transpose, args.b_transpose, args.m, args.n, args.k, + args.a_offset, args.a_ld, args.b_offset, args.b_ld, args.c_offset, args.c_ld, + queue.GetDevice()(), temp_buffer_size); + #endif + args.ap_size = (temp_buffer_size + sizeof(T)) / sizeof(T); // + sizeof(T) to prevent zero } // Describes what the default values of the leading dimensions of the matrices are @@ -83,13 +107,6 @@ class TestXgemm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - if (V != 0) { - const auto device = queue.GetDevice(); - const auto switch_threshold = (V == 1) ? size_t{0} : size_t{1024 * 1024 * 1024}; // large enough for tests - const auto override_status = OverrideParameters(device(), "GemmRoutine", PrecisionValue<T>(), - {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}}); - if (override_status != StatusCode::kSuccess) { return override_status; } - } #ifdef OPENCL_API auto queue_plain = queue(); auto event = cl_event{}; @@ -98,7 +115,7 @@ class TestXgemm { buffers.a_mat(), args.a_offset, args.a_ld, buffers.b_mat(), args.b_offset, args.b_ld, args.beta, buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); + &queue_plain, &event, buffers.ap_mat()); // temp buffer if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } #elif CUDA_API auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, @@ -106,7 +123,7 @@ class TestXgemm { buffers.a_mat(), args.a_offset, args.a_ld, buffers.b_mat(), args.b_offset, args.b_ld, args.beta, buffers.c_mat(), args.c_offset, args.c_ld, - queue.GetContext()(), queue.GetDevice()()); + queue.GetContext()(), queue.GetDevice()(), buffers.ap_mat()); // temp buffer cuStreamSynchronize(queue()); #endif return status; diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index 3b70d3f1..13e685b9 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -60,7 +60,7 @@ class TestXhemm { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); args.c_size = GetSizeC(args); diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index 6c4e12f1..a8ca4d46 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -58,7 +58,7 @@ class TestXher2k { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<U> &args) { + static void SetSizes(Arguments<U> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); args.c_size = GetSizeC(args); diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index c1bb7a0b..3fe14cb2 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -52,7 +52,7 @@ class TestXherk { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<U> &args) { + static void SetSizes(Arguments<U> &args, Queue&) { args.a_size = GetSizeA(args); args.c_size = GetSizeC(args); } diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 90cc1888..837e45d8 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -60,7 +60,7 @@ class TestXsymm { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); args.c_size = GetSizeC(args); diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 6b29aff7..bf9f3fbf 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -58,7 +58,7 @@ class TestXsyr2k { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); args.c_size = GetSizeC(args); diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index b7782176..23dcf12f 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -52,7 +52,7 @@ class TestXsyrk { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.c_size = GetSizeC(args); } diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 62d0f573..51377a16 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -52,7 +52,7 @@ class TestXtrmm { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); } diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index 9ce1f09c..66c8f415 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -53,7 +53,7 @@ class TestXtrsm { } // Describes how to set the sizes of all the buffers - static void SetSizes(Arguments<T> &args) { + static void SetSizes(Arguments<T> &args, Queue&) { args.a_size = GetSizeA(args); args.b_size = GetSizeB(args); } |