From d8827e908cd7ff70e1bf294468c12e76c749317e Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Oct 2016 17:59:05 +0200 Subject: Specialised the GEMM direct kernel in four ways for transposing/non-transposing: NN, NT, TN, TT --- src/tuning/kernels/xgemm_direct.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/tuning') diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index c3864348..c6948ef5 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -29,7 +29,7 @@ class TuneXgemmDirect { // The representative kernel and the source code static std::string KernelFamily() { return (V==1) ? "xgemm_direct_1" : "xgemm_direct_2"; } - static std::string KernelName() { return "XgemmDirect"; } + static std::string KernelName() { return "XgemmDirectTN"; } static std::string GetSources() { return #include "../src/kernels/common.opencl" @@ -50,8 +50,8 @@ class TuneXgemmDirect { static size_t DefaultM() { return 256; } static size_t DefaultN() { return 256; } static size_t DefaultK() { return 256; } - static double DefaultFraction() { return (V==1) ? 1.0 : 16.0; } // test all or sample randomly - static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging + static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly + static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging // Describes how to obtain the sizes of the buffers static size_t GetSizeX(const Arguments &) { return 1; } // N/A for this kernel @@ -154,8 +154,6 @@ class TuneXgemmDirect { tuner.AddArgumentOutput(c_mat); tuner.AddArgumentScalar(0); // c_offset tuner.AddArgumentScalar(static_cast(args.n)); // c_ld - tuner.AddArgumentScalar(1); // a_do_transpose - tuner.AddArgumentScalar(0); // b_do_transpose tuner.AddArgumentScalar(1); // c_do_transpose tuner.AddArgumentScalar(0); // a_conjugate tuner.AddArgumentScalar(0); // b_conjugate -- cgit v1.2.3