diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-02 17:59:05 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-02 17:59:05 +0200 |
commit | d8827e908cd7ff70e1bf294468c12e76c749317e (patch) | |
tree | 9122743c2e1b4c2d122d76805b3fd4163c500d7c /src/tuning/kernels | |
parent | 61f489e370c56075e166caff6d1ad671ca6787b9 (diff) |
Specialised the GEMM direct kernel in four ways for transposing/non-transposing: NN, NT, TN, TT
Diffstat (limited to 'src/tuning/kernels')
-rw-r--r-- | src/tuning/kernels/xgemm_direct.cpp | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index c3864348..c6948ef5 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -29,7 +29,7 @@ class TuneXgemmDirect { // The representative kernel and the source code static std::string KernelFamily() { return (V==1) ? "xgemm_direct_1" : "xgemm_direct_2"; } - static std::string KernelName() { return "XgemmDirect"; } + static std::string KernelName() { return "XgemmDirectTN"; } static std::string GetSources() { return #include "../src/kernels/common.opencl" @@ -50,8 +50,8 @@ class TuneXgemmDirect { static size_t DefaultM() { return 256; } static size_t DefaultN() { return 256; } static size_t DefaultK() { return 256; } - static double DefaultFraction() { return (V==1) ? 1.0 : 16.0; } // test all or sample randomly - static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging + static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly + static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging // Describes how to obtain the sizes of the buffers static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel @@ -154,8 +154,6 @@ class TuneXgemmDirect { tuner.AddArgumentOutput(c_mat); tuner.AddArgumentScalar(0); // c_offset tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld - tuner.AddArgumentScalar(1); // a_do_transpose - tuner.AddArgumentScalar(0); // b_do_transpose tuner.AddArgumentScalar(1); // c_do_transpose tuner.AddArgumentScalar(0); // a_conjugate tuner.AddArgumentScalar(0); // b_conjugate |