From 669f43aed65ccd4aae9c4a478e994660f3e2a592 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 25 Sep 2016 13:52:08 +0200 Subject: Separated the tuning parameters of the new direct GEMM kernel from the indirect version --- src/database/kernels/xgemm_direct.hpp | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 src/database/kernels/xgemm_direct.hpp (limited to 'src/database/kernels') diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp new file mode 100644 index 00000000..76055ef2 --- /dev/null +++ b/src/database/kernels/xgemm_direct.hpp @@ -0,0 +1,76 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Database generator +// +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDirectHalf = { + "XgemmDirect", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDirectSingle = { + "XgemmDirect", Precision::kSingle, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { + "XgemmDirect", Precision::kComplexSingle, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDirectDouble = { + "XgemmDirect", Precision::kDouble, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { + "XgemmDirect", Precision::kComplexDouble, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast -- cgit v1.2.3 From 73d135c2cef9763b47d410b125eb8bb89ece8432 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 25 Sep 2016 14:48:34 +0200 Subject: Added a first version of a tuner for the GEMM direct kernel; collapsed MWGD, NWGD and KWGD into one WGD parameter --- CMakeLists.txt | 3 +- src/database/kernels/xgemm_direct.hpp | 10 +- src/kernels/level3/xgemm_direct.opencl | 186 ++++++++++++++++---------------- src/routines/level3/xgemm.cpp | 8 +- src/tuning/kernels/xgemm_direct.cpp | 191 +++++++++++++++++++++++++++++++++ 5 files changed, 292 insertions(+), 106 deletions(-) create mode 100644 src/tuning/kernels/xgemm_direct.cpp (limited to 'src/database/kernels') diff --git a/CMakeLists.txt b/CMakeLists.txt index 178ac9bb..e90fdc56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,8 @@ endif() # ================================================================================================== # Sets the supported routines and the used kernels. New routines and kernels should be added here. -set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger xgemm xgemv) +set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger + xgemm xgemm_direct xgemv) set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp index 76055ef2..dc69f61b 100644 --- a/src/database/kernels/xgemm_direct.hpp +++ b/src/database/kernels/xgemm_direct.hpp @@ -18,7 +18,7 @@ const Database::DatabaseEntry Database::XgemmDirectHalf = { "XgemmDirect", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { - { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, } }, } @@ -30,7 +30,7 @@ const Database::DatabaseEntry Database::XgemmDirectSingle = { "XgemmDirect", Precision::kSingle, { { // Default kDeviceTypeAll, "default", { - { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, } }, } @@ -42,7 +42,7 @@ const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { "XgemmDirect", Precision::kComplexSingle, { { // Default kDeviceTypeAll, "default", { - { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, } }, } @@ -54,7 +54,7 @@ const Database::DatabaseEntry Database::XgemmDirectDouble = { "XgemmDirect", Precision::kDouble, { { // Default kDeviceTypeAll, "default", { - { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, } }, } @@ -66,7 +66,7 @@ const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { "XgemmDirect", Precision::kComplexDouble, { { // Default kDeviceTypeAll, "default", { - { "default", { {"KWGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"MWGD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"NWGD",32}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, } }, } diff --git a/src/kernels/level3/xgemm_direct.opencl b/src/kernels/level3/xgemm_direct.opencl index 801887dd..705ced9c 100644 --- a/src/kernels/level3/xgemm_direct.opencl +++ b/src/kernels/level3/xgemm_direct.opencl @@ -19,14 +19,8 @@ R"( // Parameters set by the tuner or by the database. Here they are given a basic default value in case // this kernel file is used outside of the CLBlast library. Note that all parameters here have a // suffix 'D' to denote that they are for the 'direct' version of the GEMM kernel. -#ifndef MWGD - #define MWGD 8 // Tile-size in dimension M (e.g. 64, 128) -#endif -#ifndef NWGD - #define NWGD 8 // Tile-size in dimension N (e.g. 64, 128) -#endif -#ifndef KWGD - #define KWGD 8 // Tile-size in dimension K (e.g. 8, 16) +#ifndef WGD + #define WGD 8 // Tile-size in dimension M, N, and K (e.g. 8, 16, 32, 64) #endif #ifndef MDIMCD #define MDIMCD 8 // Threads per workgroup in M-dimension (e.g. 8, 16, 32) @@ -41,7 +35,7 @@ R"( #define NDIMBD 8 // Re-shaped tile dimension of matrix B: KDIMBD * NDIMBD #endif #ifndef KWID - #define KWID 1 // Unroll factor of the KWGD loop (smaller or equal than KWGD) + #define KWID 1 // Unroll factor of the WGD loop (smaller or equal than WGD) #endif #ifndef VWMD #define VWMD 1 // Vector width of matrices A and C @@ -51,14 +45,14 @@ R"( #endif // Helper parameters based on the above tuning parameters -#define MWID (MWGD/MDIMCD) // Work per work-item (M-dimension) -#define NWID (NWGD/NDIMCD) // Work per work-item (N-dimension) +#define MWID (WGD/MDIMCD) // Work per work-item (M-dimension) +#define NWID (WGD/NDIMCD) // Work per work-item (N-dimension) #define KDIMAD ((MDIMCD*NDIMCD)/(MDIMAD)) // Re-shaped tile dimension of matrix A: KDIMAD * MDIMAD #define KDIMBD ((MDIMCD*NDIMCD)/(NDIMBD)) // Re-shaped tile dimension of matrix B: KDIMBD * NDIMBD -#define MWAD (MWGD/MDIMAD) // Amount of loads-per-thread for matrix A (M-dimension) -#define KWAD (KWGD/KDIMAD) // Amount of loads-per-thread for matrix A (K-dimension) -#define KWBD (KWGD/KDIMBD) // Amount of loads-per-thread for matrix B (K-dimension) -#define NWBD (NWGD/NDIMBD) // Amount of loads-per-thread for matrix B (N-dimension) +#define MWAD (WGD/MDIMAD) // Amount of loads-per-thread for matrix A (M-dimension) +#define KWAD (WGD/KDIMAD) // Amount of loads-per-thread for matrix A (K-dimension) +#define KWBD (WGD/KDIMBD) // Amount of loads-per-thread for matrix B (K-dimension) +#define NWBD (WGD/NDIMBD) // Amount of loads-per-thread for matrix B (N-dimension) // ================================================================================================= @@ -105,51 +99,51 @@ inline void GlobalToLocalDirectA(const __global realMD* restrict agm, __local re // Computes the indices for the global memory int mg = mia + la0*(MWAD/VWMD); int kg = kia + la1*KWAD; - int idm = (a_transpose) ? mg + kwg/VWMD : mg + GetGroupID0()*(MWGD/VWMD); - int idk = (a_transpose) ? kg + GetGroupID0()*MWGD : kg + kwg; + int idm = (a_transpose) ? mg + kwg/VWMD : mg + GetGroupID0()*(WGD/VWMD); + int idk = (a_transpose) ? kg + GetGroupID0()*WGD : kg + kwg; // Loads the data from global memory into the local memory const realMD avec = agm[idk*(a_ld/VWMD) + idm + a_offset]; #if VWMD == 1 - alm[kg*MWGD + mg] = avec; + alm[kg*WGD + mg] = avec; #elif VWMD == 2 - alm[kg*MWGD + mg*VWMD + 0] = avec.x; - alm[kg*MWGD + mg*VWMD + 1] = avec.y; + alm[kg*WGD + mg*VWMD + 0] = avec.x; + alm[kg*WGD + mg*VWMD + 1] = avec.y; #elif VWMD == 4 - alm[kg*MWGD + mg*VWMD + 0] = avec.x; - alm[kg*MWGD + mg*VWMD + 1] = avec.y; - alm[kg*MWGD + mg*VWMD + 2] = avec.z; - alm[kg*MWGD + mg*VWMD + 3] = avec.w; + alm[kg*WGD + mg*VWMD + 0] = avec.x; + alm[kg*WGD + mg*VWMD + 1] = avec.y; + alm[kg*WGD + mg*VWMD + 2] = avec.z; + alm[kg*WGD + mg*VWMD + 3] = avec.w; #elif VWMD == 8 - alm[kg*MWGD + mg*VWMD + 0] = avec.s0; - alm[kg*MWGD + mg*VWMD + 1] = avec.s1; - alm[kg*MWGD + mg*VWMD + 2] = avec.s2; - alm[kg*MWGD + mg*VWMD + 3] = avec.s3; - alm[kg*MWGD + mg*VWMD + 4] = avec.s4; - alm[kg*MWGD + mg*VWMD + 5] = avec.s5; - alm[kg*MWGD + mg*VWMD + 6] = avec.s6; - alm[kg*MWGD + mg*VWMD + 7] = avec.s7; + alm[kg*WGD + mg*VWMD + 0] = avec.s0; + alm[kg*WGD + mg*VWMD + 1] = avec.s1; + alm[kg*WGD + mg*VWMD + 2] = avec.s2; + alm[kg*WGD + mg*VWMD + 3] = avec.s3; + alm[kg*WGD + mg*VWMD + 4] = avec.s4; + alm[kg*WGD + mg*VWMD + 5] = avec.s5; + alm[kg*WGD + mg*VWMD + 6] = avec.s6; + alm[kg*WGD + mg*VWMD + 7] = avec.s7; #elif VWMD == 16 - alm[kg*MWGD + mg*VWMD + 0] = avec.s0; - alm[kg*MWGD + mg*VWMD + 1] = avec.s1; - alm[kg*MWGD + mg*VWMD + 2] = avec.s2; - alm[kg*MWGD + mg*VWMD + 3] = avec.s3; - alm[kg*MWGD + mg*VWMD + 4] = avec.s4; - alm[kg*MWGD + mg*VWMD + 5] = avec.s5; - alm[kg*MWGD + mg*VWMD + 6] = avec.s6; - alm[kg*MWGD + mg*VWMD + 7] = avec.s7; - alm[kg*MWGD + mg*VWMD + 8] = avec.s8; - alm[kg*MWGD + mg*VWMD + 9] = avec.s9; - alm[kg*MWGD + mg*VWMD + 10] = avec.sA; - alm[kg*MWGD + mg*VWMD + 11] = avec.sB; - alm[kg*MWGD + mg*VWMD + 12] = avec.sC; - alm[kg*MWGD + mg*VWMD + 13] = avec.sD; - alm[kg*MWGD + mg*VWMD + 14] = avec.sE; - alm[kg*MWGD + mg*VWMD + 15] = avec.sF; + alm[kg*WGD + mg*VWMD + 0] = avec.s0; + alm[kg*WGD + mg*VWMD + 1] = avec.s1; + alm[kg*WGD + mg*VWMD + 2] = avec.s2; + alm[kg*WGD + mg*VWMD + 3] = avec.s3; + alm[kg*WGD + mg*VWMD + 4] = avec.s4; + alm[kg*WGD + mg*VWMD + 5] = avec.s5; + alm[kg*WGD + mg*VWMD + 6] = avec.s6; + alm[kg*WGD + mg*VWMD + 7] = avec.s7; + alm[kg*WGD + mg*VWMD + 8] = avec.s8; + alm[kg*WGD + mg*VWMD + 9] = avec.s9; + alm[kg*WGD + mg*VWMD + 10] = avec.sA; + alm[kg*WGD + mg*VWMD + 11] = avec.sB; + alm[kg*WGD + mg*VWMD + 12] = avec.sC; + alm[kg*WGD + mg*VWMD + 13] = avec.sD; + alm[kg*WGD + mg*VWMD + 14] = avec.sE; + alm[kg*WGD + mg*VWMD + 15] = avec.sF; #endif if (a_conjugate) { for (int vm=0; vm local (matrix A and B) GlobalToLocalDirectA(agm, alm, a_ld, a_offset, tid, kwg, a_transpose, a_conjugate); @@ -351,7 +345,7 @@ __kernel void XgemmDirect(const int kSizeM, const int kSizeN, const int kSizeK, barrier(CLK_LOCAL_MEM_FENCE); // Loops over all workitem tiles, unrolled by a factor KWID - for (int pwi=0; pwi::GemmDirect(const size_t m, const size_t n, const size_t k, kernel.SetArgument(18, static_cast(b_conjugate)); // Computes the global and local thread sizes - const auto m_ceiled = Ceil(m, db_["MWGD"]); - const auto n_ceiled = Ceil(n, db_["NWGD"]); + const auto m_ceiled = Ceil(m, db_["WGD"]); + const auto n_ceiled = Ceil(n, db_["WGD"]); const auto global = std::vector{ - (m_ceiled * db_["MDIMCD"]) / db_["MWGD"], - (n_ceiled * db_["NDIMCD"]) / db_["NWGD"] + (m_ceiled * db_["MDIMCD"]) / db_["WGD"], + (n_ceiled * db_["NDIMCD"]) / db_["WGD"] }; const auto local = std::vector{db_["MDIMCD"], db_["NDIMCD"]}; diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp new file mode 100644 index 00000000..c2e8710f --- /dev/null +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -0,0 +1,191 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file uses the CLTune auto-tuner to tune the direct xgemm kernels. There are two variations: +// - V==1: This tests some limited set of tuning parameters exhaustively. +// - V==2: This tests a much larger set of tuning parameters by randomly sampling a subset. +// +// ================================================================================================= + +#include +#include + +#include "utilities.hpp" +#include "tuning/tuning.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TuneXgemmDirect { + public: + + // The representative kernel and the source code + static std::string KernelFamily() { return (V==1) ? "xgemm_direct_1" : "xgemm_direct_2"; } + static std::string KernelName() { return "XgemmDirect"; } + static std::string GetSources() { + return + #include "../src/kernels/common.opencl" + #include "../src/kernels/level3/xgemm_direct.opencl" + ; + } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction}; + } + + // Tests for valid arguments + static void TestValidArguments(const Arguments &) { } + + // Sets the default values for the arguments + static size_t DefaultM() { return 128; } + static size_t DefaultN() { return 128; } + static size_t DefaultK() { return 128; } + static double DefaultFraction() { return (V==1) ? 1.0 : 16.0; } // test all or sample randomly + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeY(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeA(const Arguments &args) { return args.m * args.k; } + static size_t GetSizeB(const Arguments &args) { return args.n * args.k; } + static size_t GetSizeC(const Arguments &args) { return args.m * args.n; } + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel + + // Sets the tuning parameters and their possible values + static void SetParameters(cltune::Tuner &tuner, const size_t id) { + if (V==1) { // limited subset of tuning parameters - but explorable exhaustively + tuner.AddParameter(id, "WGD", {8, 16, 32}); + tuner.AddParameter(id, "MDIMCD", {8, 16, 32}); + tuner.AddParameter(id, "NDIMCD", {8, 16, 32}); + tuner.AddParameter(id, "MDIMAD", {8, 16, 32}); + tuner.AddParameter(id, "NDIMBD", {8, 16, 32}); + tuner.AddParameter(id, "KWID", {2}); + tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); + tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + } // a lot more tuning parameters - has to be sampled randomly, too much to test all + else { + tuner.AddParameter(id, "WGD", {8, 16, 32, 64, 128}); + tuner.AddParameter(id, "MDIMCD", {8, 16, 32}); + tuner.AddParameter(id, "NDIMCD", {8, 16, 32}); + tuner.AddParameter(id, "MDIMAD", {8, 16, 32}); + tuner.AddParameter(id, "NDIMBD", {8, 16, 32}); + tuner.AddParameter(id, "KWID", {2, 8, 16}); + tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); + tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + } + } + + // Sets the constraints + static void SetConstraints(cltune::Tuner &tuner, const size_t id) { + auto MultipleOfX = [] (std::vector v) { return IsMultiple(v[0], v[1]); }; + auto MultipleOfXMulY = [] (std::vector v) { return IsMultiple(v[0], v[1]*v[2]); }; + auto MultipleOfXMulYDivZ = [] (std::vector v) { return IsMultiple(v[0], (v[1]*v[2])/v[3]); }; + // Requirement for unrolling the WGD loop + tuner.AddConstraint(id, MultipleOfX, {"WGD", "KWID"}); + // Required for integer MWID and NWID + tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "MDIMCD", "VWMD"}); + tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "NDIMCD", "VWND"}); + // Required for integer MWIAD and NWIBD + tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "MDIMAD", "VWMD"}); + tuner.AddConstraint(id, MultipleOfXMulY, {"WGD", "NDIMBD", "VWND"}); + // WGD has to be a multiple of KDIMAD = ((MDIMCD*NDIMCD)/(MDIMAD)) and KDIMBD = (...) + tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"WGD", "MDIMCD", "NDIMCD", "MDIMAD"}); + tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"WGD", "MDIMCD", "NDIMCD", "NDIMBD"}); + + // Extra constraints for variation 1 to limit the set of options significantly + if (V==1) { + auto IsEqual = [] (std::vector v) { return v[0] == v[1]; }; + tuner.AddConstraint(id, IsEqual, {"MDIMCD", "MDIMAD"}); + tuner.AddConstraint(id, IsEqual, {"NDIMCD", "NDIMBD"}); + } + } + + // Sets the local memory size + static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments &args) { + auto LocalMemorySize = [args] (std::vector v) { + return ((v[0]*v[1] + v[2]*v[3])*GetBytes(args.precision)); + }; + tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "WGD", "WGD", "WGD"}); + } + + // Sets the base thread configuration + static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } + static std::vector LocalSize() { return {1, 1}; } + static std::vector LocalSizeRef() { return {8, 8}; } + + // Transforms the thread configuration based on the parameters + using TransformVector = std::vector>; + static TransformVector MulLocal() { return {{"MDIMCD", "NDIMCD"}}; } + static TransformVector DivLocal() { return {}; } + static TransformVector MulGlobal() { return {{"MDIMCD", "NDIMCD"}}; } + static TransformVector DivGlobal() { return {{"WGD", "WGD"}}; } + + // Sets the kernel's arguments + static void SetArguments(cltune::Tuner &tuner, const Arguments &args, + std::vector &, std::vector &, + std::vector &a_mat, std::vector &b_mat, std::vector &c_mat, + std::vector &) { + tuner.AddArgumentScalar(static_cast(args.m)); + tuner.AddArgumentScalar(static_cast(args.n)); + tuner.AddArgumentScalar(static_cast(args.k)); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); + tuner.AddArgumentScalar(GetRealArg(args.beta)); + tuner.AddArgumentInput(a_mat); + tuner.AddArgumentScalar(0); // a_offset + tuner.AddArgumentScalar(static_cast(args.k)); // a_ld + tuner.AddArgumentInput(b_mat); + tuner.AddArgumentScalar(0); // b_offset + tuner.AddArgumentScalar(static_cast(args.n)); // b_ld + tuner.AddArgumentOutput(c_mat); + tuner.AddArgumentScalar(0); // c_offset + tuner.AddArgumentScalar(static_cast(args.n)); // c_ld + tuner.AddArgumentScalar(1); // a_do_transpose + tuner.AddArgumentScalar(1); // b_do_transpose + tuner.AddArgumentScalar(1); // c_do_transpose + tuner.AddArgumentScalar(0); // a_conjugate + tuner.AddArgumentScalar(0); // b_conjugate + } + + // Describes how to compute the performance metrics + static size_t GetMetric(const Arguments &args) { + return 2 * args.m * args.n * args.k; + } + static std::string PerformanceUnit() { return "GFLOPS"; } +}; + +// ================================================================================================= +} // namespace clblast + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Function to tune a specific variation V (not within the clblast namespace) +template +void StartVariation(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv)) { + case clblast::Precision::kHalf: clblast::Tuner, half>(argc, argv); break; + case clblast::Precision::kSingle: clblast::Tuner, float>(argc, argv); break; + case clblast::Precision::kDouble: clblast::Tuner, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: clblast::Tuner, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: clblast::Tuner, double2>(argc, argv); break; + } +} + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + StartVariation<1>(argc, argv); + StartVariation<2>(argc, argv); + return 0; +} + +// ================================================================================================= -- cgit v1.2.3 From a45992010591bfbf46fdc99496e68982cad163b9 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 1 Oct 2016 16:58:53 +0200 Subject: Added padding to the local memory of the GEMM direct kernel --- src/database/kernels/xgemm_direct.hpp | 10 +- src/kernels/level3/xgemm_direct.opencl | 173 ++++++++++++++++++--------------- src/tuning/kernels/xgemm_direct.cpp | 10 +- 3 files changed, 106 insertions(+), 87 deletions(-) (limited to 'src/database/kernels') diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp index dc69f61b..bc91fdc2 100644 --- a/src/database/kernels/xgemm_direct.hpp +++ b/src/database/kernels/xgemm_direct.hpp @@ -18,7 +18,7 @@ const Database::DatabaseEntry Database::XgemmDirectHalf = { "XgemmDirect", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, } }, } @@ -30,7 +30,7 @@ const Database::DatabaseEntry Database::XgemmDirectSingle = { "XgemmDirect", Precision::kSingle, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, } }, } @@ -42,7 +42,7 @@ const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { "XgemmDirect", Precision::kComplexSingle, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, } }, } @@ -54,7 +54,7 @@ const Database::DatabaseEntry Database::XgemmDirectDouble = { "XgemmDirect", Precision::kDouble, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, } }, } @@ -66,7 +66,7 @@ const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { "XgemmDirect", Precision::kComplexDouble, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1} } }, + { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, } }, } diff --git a/src/kernels/level3/xgemm_direct.opencl b/src/kernels/level3/xgemm_direct.opencl index 705ced9c..75618e8c 100644 --- a/src/kernels/level3/xgemm_direct.opencl +++ b/src/kernels/level3/xgemm_direct.opencl @@ -43,6 +43,12 @@ R"( #ifndef VWND #define VWND 1 // Vector width of matrix B #endif +#ifndef PADA + #define PADA 1 // Local memory padding for matrix A +#endif +#ifndef PADB + #define PADB 1 // Local memory padding for matrix B +#endif // Helper parameters based on the above tuning parameters #define MWID (WGD/MDIMCD) // Work per work-item (M-dimension) @@ -87,10 +93,16 @@ R"( // Caches global off-chip memory into local (shared) memory on-chip. This function is specific for // caching the A input matrix. inline void GlobalToLocalDirectA(const __global realMD* restrict agm, __local real* alm, - const int a_ld, const int a_offset, const int tid, const int kwg, + const int a_ld, const int a_offset, const int kwg, const int a_transpose, const int a_conjugate) { - const int la0 = tid % MDIMAD; - const int la1 = tid / MDIMAD; + #if MDIMCD == MDIMAD + const int la0 = get_local_id(0); + const int la1 = get_local_id(1); + #else + const int tid = get_local_id(0) + MDIMCD*get_local_id(1); + const int la0 = tid % MDIMAD; + const int la1 = tid / MDIMAD; + #endif #pragma unroll for (int mia=0; mia local (matrix A and B) - GlobalToLocalDirectA(agm, alm, a_ld, a_offset, tid, kwg, a_transpose, a_conjugate); - GlobalToLocalDirectB(bgm, blm, b_ld, b_offset, tid, kwg, b_transpose, b_conjugate); + GlobalToLocalDirectA(agm, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate); + GlobalToLocalDirectB(bgm, blm, b_ld, b_offset, kwg, b_transpose, b_conjugate); barrier(CLK_LOCAL_MEM_FENCE); // Loops over all workitem tiles, unrolled by a factor KWID diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index 98714da8..6ab6d1f0 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -71,6 +71,8 @@ class TuneXgemmDirect { tuner.AddParameter(id, "KWID", {2}); tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + tuner.AddParameter(id, "PADA", {1}); + tuner.AddParameter(id, "PADB", {1}); } // a lot more tuning parameters - has to be sampled randomly, too much to test all else { tuner.AddParameter(id, "WGD", {8, 16, 32, 64, 128}); @@ -81,6 +83,8 @@ class TuneXgemmDirect { tuner.AddParameter(id, "KWID", {2, 8, 16}); tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + tuner.AddParameter(id, "PADA", {0, 1}); + tuner.AddParameter(id, "PADB", {0, 1}); } } @@ -112,9 +116,9 @@ class TuneXgemmDirect { // Sets the local memory size static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments &args) { auto LocalMemorySize = [args] (std::vector v) { - return ((v[0]*v[1] + v[2]*v[3])*GetBytes(args.precision)); + return ((v[0]*(v[0] + v[1]) + v[0]*(v[0] + v[2]))*GetBytes(args.precision)); }; - tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "WGD", "WGD", "WGD"}); + tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "PADA", "PADB"}); } // Sets the base thread configuration @@ -150,7 +154,7 @@ class TuneXgemmDirect { tuner.AddArgumentScalar(0); // c_offset tuner.AddArgumentScalar(static_cast(args.n)); // c_ld tuner.AddArgumentScalar(1); // a_do_transpose - tuner.AddArgumentScalar(1); // b_do_transpose + tuner.AddArgumentScalar(0); // b_do_transpose tuner.AddArgumentScalar(1); // c_do_transpose tuner.AddArgumentScalar(0); // a_conjugate tuner.AddArgumentScalar(0); // b_conjugate -- cgit v1.2.3 From b698e454782d6347fbd329dded24c4ef3895b566 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 6 Oct 2016 21:13:14 +0200 Subject: Added first tuning results for the single-kernel direct GEMM implementation --- CHANGELOG | 5 ++- src/database/kernels/xgemm_direct.hpp | 72 ++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 8 deletions(-) (limited to 'src/database/kernels') diff --git a/CHANGELOG b/CHANGELOG index 9adb6e64..87ecccce 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,9 @@ Development version (next release) -- It is now possible to set OpenCL compiler options through the env variable CLBLAST_BUILD_OPTIONS -- Fixed a bug in the tests and samples related to waiting for an invalid event - Updated to version 8.0 of the CLCudaAPI C++11 OpenCL header +- Improved performance of GEMM kernels for small sizes by using a direct single-kernel implementation +- Fixed a bug in the tests and samples related to waiting for an invalid event +- Added an option to set OpenCL compiler options through the env variable CLBLAST_BUILD_OPTIONS - Added an option to run tuned kernels multiple times to average execution times - Various minor fixes and enhancements diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp index bc91fdc2..202deb1f 100644 --- a/src/database/kernels/xgemm_direct.hpp +++ b/src/database/kernels/xgemm_direct.hpp @@ -7,7 +7,7 @@ // Author(s): // Database generator // -// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. // // ================================================================================================= @@ -18,7 +18,7 @@ const Database::DatabaseEntry Database::XgemmDirectHalf = { "XgemmDirect", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, } }, } @@ -28,9 +28,27 @@ const Database::DatabaseEntry Database::XgemmDirectHalf = { const Database::DatabaseEntry Database::XgemmDirectSingle = { "XgemmDirect", Precision::kSingle, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris Pro", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",4}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",4}, {"WGD",32} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, } }, } @@ -40,9 +58,27 @@ const Database::DatabaseEntry Database::XgemmDirectSingle = { const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { "XgemmDirect", Precision::kComplexSingle, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris Pro", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, } }, } @@ -52,9 +88,21 @@ const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { const Database::DatabaseEntry Database::XgemmDirectDouble = { "XgemmDirect", Precision::kDouble, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, } }, } @@ -64,9 +112,21 @@ const Database::DatabaseEntry Database::XgemmDirectDouble = { const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { "XgemmDirect", Precision::kComplexDouble, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + { "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",32}, {"MDIMCD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",32}, {"MDIMCD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGD",32}, {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"VWMD",1}, {"VWND",1}, {"PADA",0}, {"PADB",0} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } }, } }, } -- cgit v1.2.3 From 7baac46e723088bba1b6845d7dfd709563174a87 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 8 Oct 2016 21:56:06 +0200 Subject: Fixed a performance bug for Intel Iris Pro GPUs due to incorrect tuning results --- src/database/kernels/xgemm.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/database/kernels') diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index d19c55b5..e289c542 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -59,8 +59,8 @@ const Database::DatabaseEntry Database::XgemmSingle = { { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } }, { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, - { "Iris Pro", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Iris Pro", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Intel accelerators -- cgit v1.2.3 From 08ee57f494648d8e64415dd8249797534f314308 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 10 Oct 2016 16:41:41 +0200 Subject: Updated the tuning results for the GTX 750 Ti GPU --- src/database/kernels/copy.hpp | 10 +++++----- src/database/kernels/pad.hpp | 8 ++++---- src/database/kernels/padtranspose.hpp | 2 +- src/database/kernels/transpose.hpp | 2 +- src/database/kernels/xaxpy.hpp | 6 +++--- src/database/kernels/xdot.hpp | 4 ++++ src/database/kernels/xgemm.hpp | 8 ++++---- src/database/kernels/xgemm_direct.hpp | 12 ++++++------ src/database/kernels/xgemv.hpp | 6 +++--- src/database/kernels/xgemv_fast.hpp | 5 ++--- src/database/kernels/xgemv_fast_rot.hpp | 6 ++++-- src/database/kernels/xger.hpp | 8 ++++++-- 12 files changed, 43 insertions(+), 34 deletions(-) (limited to 'src/database/kernels') diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index a6b7dfe8..2eab308d 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -84,7 +84,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "GeForce GTX 670", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "GeForce GTX 680", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "GeForce GTX 750", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, @@ -147,7 +147,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 670", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 750", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, - { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 980", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } }, @@ -205,13 +205,13 @@ const Database::DatabaseEntry Database::CopyDouble = { { "GeForce GTX 670", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX 680", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX 750", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",1} } }, - { "GeForce GTX 750 Ti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Default @@ -264,7 +264,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "GeForce GTX 670", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 680", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 750", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX 980", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index 3cfabaf4..49393a36 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -84,7 +84,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "GeForce GTX 670", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, { "GeForce GTX 680", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "GeForce GTX 750", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, - { "GeForce GTX 750 Ti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX TITAN", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, @@ -154,13 +154,13 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "GeForce GTX 670", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "GeForce GTX 680", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "GeForce GTX 750", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "GeForce GTX 750 Ti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX TITAN", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default @@ -272,7 +272,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "GeForce GTX 670", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX 680", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX 750", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "GeForce GTX 750 Ti", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "GeForce GTX TITAN", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index 88bd4ea7..61951b6f 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -272,7 +272,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "GeForce GTX 670", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "GeForce GTX 680", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "GeForce GTX 750", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, { "GeForce GTX 980", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index 0e1b608e..7619878e 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -207,7 +207,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "GeForce GTX 670", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "GeForce GTX 680", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "GeForce GTX 750", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "GeForce GTX 750 Ti", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "GeForce GTX 980", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "GeForce GTX TITAN", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 9c1bcd99..89c7f48a 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -84,7 +84,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "GeForce GTX 670", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 680", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "GeForce GTX TITAN", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, @@ -213,7 +213,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "GeForce GTX 670", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",256}, {"WPT",2} } }, { "GeForce GTX 980", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "GeForce GTX TITAN", { {"VW",2}, {"WGS",1024}, {"WPT",1} } }, { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, @@ -272,7 +272,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "GeForce GTX 670", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, - { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",256}, {"WPT",2} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",64}, {"WPT",2} } }, { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "GeForce GTX TITAN", { {"VW",1}, {"WGS",64}, {"WPT",4} } }, { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index 987a990d..51ca2cd7 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -68,6 +68,7 @@ const Database::DatabaseEntry Database::XdotSingle = { { "GeForce GTX 670", { {"WGS1",512}, {"WGS2",1024} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",128} } }, { "GeForce GTX 750", { {"WGS1",128}, {"WGS2",32} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } }, @@ -119,6 +120,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "GeForce GTX 670", { {"WGS1",256}, {"WGS2",32} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",64} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WGS2",32} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, @@ -160,6 +162,7 @@ const Database::DatabaseEntry Database::XdotDouble = { { "GeForce GTX 670", { {"WGS1",256}, {"WGS2",32} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",64} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WGS2",256} } }, + { "GeForce GTX 750 Ti", { {"WGS1",128}, {"WGS2",64} } }, { "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, @@ -201,6 +204,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "GeForce GTX 670", { {"WGS1",512}, {"WGS2",128} } }, { "GeForce GTX 680", { {"WGS1",256}, {"WGS2",64} } }, { "GeForce GTX 750", { {"WGS1",256}, {"WGS2",32} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } }, diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index e289c542..5532ef6b 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -77,7 +77,7 @@ const Database::DatabaseEntry Database::XgemmSingle = { { "GeForce GTX 670", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, { "GeForce GTX 680", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",2} } }, { "GeForce GTX 750", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",2} } }, - { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",4} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",2} } }, { "GeForce GTX 980", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",8} } }, { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",8} } }, @@ -147,7 +147,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { "GeForce GTX 670", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, { "GeForce GTX 680", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, { "GeForce GTX 750", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, - { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, { "GeForce GTX 980", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, @@ -206,7 +206,7 @@ const Database::DatabaseEntry Database::XgemmDouble = { { "GeForce GTX 670", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "GeForce GTX 680", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, { "GeForce GTX 750", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, - { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, + { "GeForce GTX 750 Ti", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",2} } }, { "GeForce GTX 980", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, @@ -265,7 +265,7 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { { "GeForce GTX 670", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",2} } }, { "GeForce GTX 680", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "GeForce GTX 750", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, - { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "GeForce GTX 750 Ti", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "GeForce GTX 980", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, { "GeForce GTX TITAN X", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "Tesla K20m", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp index 202deb1f..53dfeaa6 100644 --- a/src/database/kernels/xgemm_direct.hpp +++ b/src/database/kernels/xgemm_direct.hpp @@ -42,8 +42,8 @@ const Database::DatabaseEntry Database::XgemmDirectSingle = { }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, - { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } }, + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } }, } }, { // Default @@ -72,8 +72,8 @@ const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } }, - { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } }, + { "GeForce GTX 750 Ti", { {"KWID",16}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",16} } }, + { "default", { {"KWID",16}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",16} } }, } }, { // Default @@ -96,8 +96,8 @@ const Database::DatabaseEntry Database::XgemmDirectDouble = { }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, - { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } }, + { "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",4}, {"WGD",32} } }, + { "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",4}, {"WGD",32} } }, } }, { // Default diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index e5e8845e..a4a339dc 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -77,7 +77,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { { "GeForce GTX 670", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX 680", { {"WGS1",256}, {"WPT1",1} } }, { "GeForce GTX 750", { {"WGS1",256}, {"WPT1",1} } }, - { "GeForce GTX 750 Ti", { {"WGS1",256}, {"WPT1",1} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WPT1",1} } }, { "GeForce GTX 980", { {"WGS1",128}, {"WPT1",1} } }, { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } }, @@ -140,7 +140,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { { "GeForce GTX 670", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX 680", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX 750", { {"WGS1",128}, {"WPT1",1} } }, - { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WPT1",1} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WPT1",1} } }, { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } }, { "default", { {"WGS1",64}, {"WPT1",1} } }, } @@ -188,7 +188,7 @@ const Database::DatabaseEntry Database::XgemvDouble = { { "GeForce GTX 670", { {"WGS1",128}, {"WPT1",1} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WPT1",1} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WPT1",1} } }, - { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WPT1",1} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WPT1",1} } }, { "GeForce GTX 980", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } }, { "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } }, diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index 52af628c..7e5afaae 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -77,7 +77,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "GeForce GTX 670", { {"VW2",2}, {"WGS2",256}, {"WPT2",2} } }, { "GeForce GTX 680", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "GeForce GTX 750", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, - { "GeForce GTX 750 Ti", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, + { "GeForce GTX 750 Ti", { {"VW2",2}, {"WGS2",32}, {"WPT2",2} } }, { "GeForce GTX 980", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "GeForce GTX TITAN", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, @@ -139,7 +139,6 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { { "GeForce GTX 480", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "GeForce GTX 670", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "GeForce GTX 680", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, - { "GeForce GTX 750 Ti", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, } }, @@ -186,7 +185,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = { { "GeForce GTX 670", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "GeForce GTX 680", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "GeForce GTX 750", { {"VW2",2}, {"WGS2",256}, {"WPT2",2} } }, - { "GeForce GTX 750 Ti", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, + { "GeForce GTX 750 Ti", { {"VW2",1}, {"WGS2",32}, {"WPT2",2} } }, { "GeForce GTX 980", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "GeForce GTX TITAN", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 328094e1..58854189 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -51,8 +51,9 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, } }, { // Default @@ -114,8 +115,9 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = { }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 750 Ti", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, } }, { // Default diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp index 3e9c25c1..d057c65c 100644 --- a/src/database/kernels/xger.hpp +++ b/src/database/kernels/xger.hpp @@ -76,8 +76,9 @@ const Database::DatabaseEntry Database::XgerSingle = { { "GeForce GTX 670", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WGS2",16}, {"WPT",4} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, + { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, } }, { // Default @@ -133,6 +134,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "GeForce GTX 670", { {"WGS1",16}, {"WGS2",32}, {"WPT",2} } }, { "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",32}, {"WGS2",16}, {"WPT",4} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, } @@ -180,8 +182,9 @@ const Database::DatabaseEntry Database::XgerDouble = { { "GeForce GTX 670", { {"WGS1",32}, {"WGS2",32}, {"WPT",2} } }, { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",16}, {"WPT",1} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, } }, { // Default @@ -227,6 +230,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "GeForce GTX 670", { {"WGS1",8}, {"WGS2",16}, {"WPT",2} } }, { "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } }, { "GeForce GTX 750", { {"WGS1",8}, {"WGS2",32}, {"WPT",4} } }, + { "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, } -- cgit v1.2.3 From ebb505b7836244d07c5ae37d5d9914bea761e185 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 13 Oct 2016 12:18:28 +0200 Subject: Added tuning results for Intel HD Graphics IvyBridge GPU --- CHANGELOG | 1 + README.md | 3 ++- src/database/kernels/copy.hpp | 2 ++ src/database/kernels/pad.hpp | 4 +++- src/database/kernels/padtranspose.hpp | 2 ++ src/database/kernels/transpose.hpp | 4 +++- src/database/kernels/xaxpy.hpp | 2 ++ src/database/kernels/xdot.hpp | 2 ++ src/database/kernels/xgemm.hpp | 4 +++- src/database/kernels/xgemv.hpp | 2 ++ src/database/kernels/xgemv_fast.hpp | 4 +++- src/database/kernels/xgemv_fast_rot.hpp | 2 ++ src/database/kernels/xger.hpp | 2 ++ 13 files changed, 29 insertions(+), 5 deletions(-) (limited to 'src/database/kernels') diff --git a/CHANGELOG b/CHANGELOG index 34e1736b..0d517869 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,7 @@ Development version (next release) - Added an option to set OpenCL compiler options through the env variable CLBLAST_BUILD_OPTIONS - Added an option to run tuned kernels multiple times to average execution times - Various minor fixes and enhancements +- Added tuned parameters for various devices (see README) Version 0.9.0 - Updated to version 6.0 of the CLCudaAPI C++11 OpenCL header diff --git a/README.md b/README.md index a8e79c39..33282d8f 100644 --- a/README.md +++ b/README.md @@ -119,8 +119,9 @@ The CLBlast library will be tuned in the future for the most commonly used OpenC - Tahiti * Intel GPUs: - HD Graphics 530 - - HD Graphics Haswell Ultrabook GT2 Mobile - HD Graphics 5500 BroadWell U-Processor GT2 + - HD Graphics Haswell Ultrabook GT2 Mobile + - HD Graphics IvyBridge M GT2 - HD Graphics Skylake ULT GT2 - Iris - Iris Pro diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index 2eab308d..479c7f78 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -64,6 +64,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "Intel(R) HD Graphics 530", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, @@ -128,6 +129,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "Intel(R) HD Graphics 530", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } }, diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index 49393a36..48085139 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -64,6 +64,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "Intel(R) HD Graphics 530", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, @@ -134,10 +135,11 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Intel(R) HD Graphics 530", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // Intel accelerators diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index 61951b6f..f9448985 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -64,6 +64,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, @@ -134,6 +135,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index 7619878e..191d2e98 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -64,6 +64,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, @@ -134,6 +135,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, @@ -159,7 +161,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, } diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 89c7f48a..70be5abc 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -64,6 +64,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "Intel(R) HD Graphics 530", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",1}, {"WGS",512}, {"WPT",2} } }, { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Iris Pro", { {"VW",1}, {"WGS",128}, {"WPT",2} } }, @@ -134,6 +135,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Intel(R) HD Graphics 530", { {"VW",4}, {"WGS",64}, {"WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",2}, {"WGS",512}, {"WPT",1} } }, { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } }, diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index 51ca2cd7..96a699aa 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -55,6 +55,7 @@ const Database::DatabaseEntry Database::XdotSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WGS2",32} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WGS2",32} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",512}, {"WGS2",128} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",32} } }, { "Iris Pro", { {"WGS1",512}, {"WGS2",64} } }, { "default", { {"WGS1",64}, {"WGS2",32} } }, @@ -107,6 +108,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WGS2",32} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",512}, {"WGS2",32} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",32}, {"WGS2",32} } }, { "Iris Pro", { {"WGS1",32}, {"WGS2",32} } }, { "default", { {"WGS1",32}, {"WGS2",32} } }, diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index 5532ef6b..ffe3dc57 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -57,10 +57,11 @@ const Database::DatabaseEntry Database::XgemmSingle = { { "Intel(R) HD Graphics 530", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } }, { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, { "Iris Pro", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Intel accelerators @@ -127,6 +128,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { "Intel(R) HD Graphics 530", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "Iris Pro", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index a4a339dc..be6606a6 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -57,6 +57,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WPT1",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WPT1",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",256}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Iris", { {"WGS1",64}, {"WPT1",2} } }, { "Iris Pro", { {"WGS1",256}, {"WPT1",2} } }, @@ -120,6 +121,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WPT1",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",256}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Iris", { {"WGS1",256}, {"WPT1",1} } }, { "Iris Pro", { {"WGS1",64}, {"WPT1",1} } }, diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index 7e5afaae..cd430dcb 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -57,10 +57,11 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "Intel(R) HD Graphics 530", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",2}, {"WGS2",32}, {"WPT2",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, { "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } }, + { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",2} } }, } }, { // Intel accelerators @@ -120,6 +121,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { { "Intel(R) HD Graphics 530", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",32}, {"WPT2",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW2",1}, {"WGS2",32}, {"WPT2",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris Pro", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 58854189..2dd7db32 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -44,6 +44,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",64}, {"WPT3",16} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, @@ -84,6 +85,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",128}, {"WPT3",8} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"VW3",4}, {"WGS3",32}, {"WPT3",8} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",8} } }, diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp index d057c65c..7816080f 100644 --- a/src/database/kernels/xger.hpp +++ b/src/database/kernels/xger.hpp @@ -63,6 +63,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } }, { "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, @@ -121,6 +122,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } }, -- cgit v1.2.3 From 0f9311d46aa06ecf9fecdd500467e5e58350adfe Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 14 Oct 2016 20:56:32 +0200 Subject: Fixed an issue with a growing database: the database is now a global variable in a namespace and its container uses const-pointers to the actual data --- scripts/database/database/clblast.py | 7 ++--- src/database/database.cpp | 48 +++++++++++++++------------------ src/database/database.hpp | 39 ++++++++++++--------------- src/database/kernel_selection.hpp | 12 +++++---- src/database/kernels/copy.hpp | 12 +++++---- src/database/kernels/pad.hpp | 12 +++++---- src/database/kernels/padtranspose.hpp | 12 +++++---- src/database/kernels/transpose.hpp | 12 +++++---- src/database/kernels/xaxpy.hpp | 12 +++++---- src/database/kernels/xdot.hpp | 12 +++++---- src/database/kernels/xgemm.hpp | 12 +++++---- src/database/kernels/xgemm_direct.hpp | 12 +++++---- src/database/kernels/xgemv.hpp | 12 +++++---- src/database/kernels/xgemv_fast.hpp | 12 +++++---- src/database/kernels/xgemv_fast_rot.hpp | 12 +++++---- src/database/kernels/xger.hpp | 12 +++++---- src/routine.cpp | 2 +- src/routine.hpp | 2 +- 18 files changed, 135 insertions(+), 119 deletions(-) (limited to 'src/database/kernels') diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index 8190f225..d89b6350 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -54,19 +54,20 @@ def get_cpp_header(family): // // This file populates the database with best-found tuning parameters for the '%s' kernels. //\n""" - % family.title() + get_cpp_separator() + "\n\nnamespace clblast {\n" + get_cpp_separator()) + % family.title() + get_cpp_separator() + \ + "\n\nnamespace clblast {\n" + "namespace database {\n" + get_cpp_separator()) def get_cpp_footer(): """Retrieves the C++ footer""" - return "\n} // namespace clblast\n" + return "\n} // namespace database\n" + "} // namespace clblast\n" def get_cpp_precision(family, precision): """Retrieves the C++ code for the start of a new precision""" precision_string = precision_to_string(precision) camelcase_name = family.title().replace("_", "") - return("\n\nconst Database::DatabaseEntry Database::%s%s = {\n \"%s\", Precision::k%s, {\n" + return("\n\nconst Database::DatabaseEntry %s%s = {\n \"%s\", Precision::k%s, {\n" % (camelcase_name, precision_string, camelcase_name, precision_string)) diff --git a/src/database/database.cpp b/src/database/database.cpp index 1198cefb..2340a89c 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -32,28 +32,22 @@ namespace clblast { // ================================================================================================= // Initializes the database -const std::vector Database::database = { - XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble, - XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, - XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, - XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, - XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, - XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, - XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, - XgemmDirectHalf, XgemmDirectSingle, XgemmDirectDouble, XgemmDirectComplexSingle, XgemmDirectComplexDouble, - CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, - PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, - TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, - PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble, - KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble +const std::vector Database::database = { + &database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble, + &database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble, + &database::XgemvHalf, &database::XgemvSingle, &database::XgemvDouble, &database::XgemvComplexSingle, &database::XgemvComplexDouble, + &database::XgemvFastHalf, &database::XgemvFastSingle, &database::XgemvFastDouble, &database::XgemvFastComplexSingle, &database::XgemvFastComplexDouble, + &database::XgemvFastRotHalf, &database::XgemvFastRotSingle, &database::XgemvFastRotDouble, &database::XgemvFastRotComplexSingle, &database::XgemvFastRotComplexDouble, + &database::XgerHalf, &database::XgerSingle, &database::XgerDouble, &database::XgerComplexSingle, &database::XgerComplexDouble, + &database::XgemmHalf, &database::XgemmSingle, &database::XgemmDouble, &database::XgemmComplexSingle, &database::XgemmComplexDouble, + &database::XgemmDirectHalf, &database::XgemmDirectSingle, &database::XgemmDirectDouble, &database::XgemmDirectComplexSingle, &database::XgemmDirectComplexDouble, + &database::CopyHalf, &database::CopySingle, &database::CopyDouble, &database::CopyComplexSingle, &database::CopyComplexDouble, + &database::PadHalf, &database::PadSingle, &database::PadDouble, &database::PadComplexSingle, &database::PadComplexDouble, + &database::TransposeHalf, &database::TransposeSingle, &database::TransposeDouble, &database::TransposeComplexSingle, &database::TransposeComplexDouble, + &database::PadtransposeHalf, &database::PadtransposeSingle, &database::PadtransposeDouble, &database::PadtransposeComplexSingle, &database::PadtransposeComplexDouble, + &database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble }; -// The OpenCL device types -const std::string Database::kDeviceTypeCPU = "CPU"; -const std::string Database::kDeviceTypeGPU = "GPU"; -const std::string Database::kDeviceTypeAccelerator = "accelerator"; -const std::string Database::kDeviceTypeAll = "default"; - // The OpenCL device vendors const std::string Database::kDeviceVendorAll = "default"; @@ -70,7 +64,7 @@ const std::unordered_map Database::kVendorNames{ // Constructor, computing device properties and populating the parameter-vector from the database. // This takes an optional overlay database in case of custom tuning or custom kernels. Database::Database(const Queue &queue, const std::vector &kernels, - const Precision precision, const std::vector &overlay): + const Precision precision, const std::vector &overlay): parameters_{} { // Finds information of the current device @@ -90,8 +84,8 @@ Database::Database(const Queue &queue, const std::vector &kernels, for (auto &kernel: kernels) { auto search_result = ParametersPtr{}; - for (auto db: { &overlay, &database }) { - search_result = Search(kernel, device_type, device_vendor, device_name, precision, *db); + for (auto &db: { database, overlay}) { + search_result = Search(kernel, device_type, device_vendor, device_name, precision, db); if (search_result) { parameters_.insert(search_result->begin(), search_result->end()); break; @@ -121,17 +115,17 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel, const std::string &this_vendor, const std::string &this_device, const Precision this_precision, - const std::vector &this_database) const { + const std::vector &this_database) const { // Selects the right kernel for (auto &db: this_database) { - if (db.kernel == this_kernel && db.precision == this_precision) { + if (db->kernel == this_kernel && db->precision == this_precision) { // Searches for the right vendor and device type, or selects the default if unavailable. This // assumes that the default vendor / device type is last in the database. - for (auto &vendor: db.vendors) { + for (auto &vendor: db->vendors) { if ((vendor.name == this_vendor || vendor.name == kDeviceVendorAll) && - (vendor.type == this_type || vendor.type == kDeviceTypeAll)) { + (vendor.type == this_type || vendor.type == database::kDeviceTypeAll)) { // Searches for the right device. If the current device is unavailable, selects the vendor // default parameters. This assumes the default is last in the database. diff --git a/src/database/database.hpp b/src/database/database.hpp index 346fe089..8a3e7040 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -26,6 +26,19 @@ namespace clblast { // ================================================================================================= +// A special namespace to hold all the global constant variables (including the database entries) +namespace database { + + // The OpenCL device types + const std::string kDeviceTypeCPU = "CPU"; + const std::string kDeviceTypeGPU = "GPU"; + const std::string kDeviceTypeAccelerator = "accelerator"; + const std::string kDeviceTypeAll = "default"; + +} // namespace database + +// ================================================================================================= + // See comment at top of file for a description of the class class Database { public: @@ -50,12 +63,6 @@ class Database { std::vector vendors; }; - // The OpenCL device types - static const std::string kDeviceTypeCPU; - static const std::string kDeviceTypeGPU; - static const std::string kDeviceTypeAccelerator; - static const std::string kDeviceTypeAll; - // The OpenCL device vendors static const std::string kDeviceVendorAll; @@ -63,24 +70,11 @@ class Database { static const std::unordered_map kVendorNames; // The database consists of separate database entries, stored together in a vector - static const DatabaseEntry XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; - static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; - static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; - static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; - static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; - static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; - static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; - static const DatabaseEntry XgemmDirectHalf, XgemmDirectSingle, XgemmDirectDouble, XgemmDirectComplexSingle, XgemmDirectComplexDouble; - static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; - static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; - static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; - static const DatabaseEntry PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble; - static const DatabaseEntry KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble; - static const std::vector database; + static const std::vector database; // The constructor with a user-provided database overlay (potentially an empty vector) explicit Database(const Queue &queue, const std::vector &routines, - const Precision precision, const std::vector &overlay); + const Precision precision, const std::vector &overlay); // Accessor of values by key size_t operator[](const std::string key) const { return parameters_.find(key)->second; } @@ -92,7 +86,8 @@ class Database { // Search method for a specified database, returning pointer (possibly a nullptr) ParametersPtr Search(const std::string &this_kernel, const std::string &this_type, const std::string &this_vendor, const std::string &this_device, - const Precision this_precision, const std::vector &db) const; + const Precision this_precision, + const std::vector &db) const; // Found parameters suitable for this device/kernel Parameters parameters_; diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp index c9462c7a..7e5e7821 100644 --- a/src/database/kernel_selection.hpp +++ b/src/database/kernel_selection.hpp @@ -15,9 +15,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::KernelSelectionHalf = { +const Database::DatabaseEntry KernelSelectionHalf = { "KernelSelection", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -39,7 +40,7 @@ const Database::DatabaseEntry Database::KernelSelectionHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::KernelSelectionSingle = { +const Database::DatabaseEntry KernelSelectionSingle = { "KernelSelection", Precision::kSingle, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -61,7 +62,7 @@ const Database::DatabaseEntry Database::KernelSelectionSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::KernelSelectionComplexSingle = { +const Database::DatabaseEntry KernelSelectionComplexSingle = { "KernelSelection", Precision::kComplexSingle, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -83,7 +84,7 @@ const Database::DatabaseEntry Database::KernelSelectionComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::KernelSelectionDouble = { +const Database::DatabaseEntry KernelSelectionDouble = { "KernelSelection", Precision::kDouble, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -105,7 +106,7 @@ const Database::DatabaseEntry Database::KernelSelectionDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::KernelSelectionComplexDouble = { +const Database::DatabaseEntry KernelSelectionComplexDouble = { "KernelSelection", Precision::kComplexDouble, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -126,4 +127,5 @@ const Database::DatabaseEntry Database::KernelSelectionComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index 479c7f78..16aa6b3f 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::CopyHalf = { +const Database::DatabaseEntry CopyHalf = { "Copy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::CopyHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::CopySingle = { +const Database::DatabaseEntry CopySingle = { "Copy", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -104,7 +105,7 @@ const Database::DatabaseEntry Database::CopySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyComplexSingle = { +const Database::DatabaseEntry CopyComplexSingle = { "Copy", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -167,7 +168,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyDouble = { +const Database::DatabaseEntry CopyDouble = { "Copy", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -226,7 +227,7 @@ const Database::DatabaseEntry Database::CopyDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyComplexDouble = { +const Database::DatabaseEntry CopyComplexDouble = { "Copy", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -284,4 +285,5 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index 48085139..6c5e0c2f 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::PadHalf = { +const Database::DatabaseEntry PadHalf = { "Pad", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::PadHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::PadSingle = { +const Database::DatabaseEntry PadSingle = { "Pad", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -104,7 +105,7 @@ const Database::DatabaseEntry Database::PadSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadComplexSingle = { +const Database::DatabaseEntry PadComplexSingle = { "Pad", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -175,7 +176,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadDouble = { +const Database::DatabaseEntry PadDouble = { "Pad", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -234,7 +235,7 @@ const Database::DatabaseEntry Database::PadDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadComplexDouble = { +const Database::DatabaseEntry PadComplexDouble = { "Pad", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -292,4 +293,5 @@ const Database::DatabaseEntry Database::PadComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index f9448985..4003ec6d 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::PadtransposeHalf = { +const Database::DatabaseEntry PadtransposeHalf = { "Padtranspose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::PadtransposeHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::PadtransposeSingle = { +const Database::DatabaseEntry PadtransposeSingle = { "Padtranspose", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -104,7 +105,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadtransposeComplexSingle = { +const Database::DatabaseEntry PadtransposeComplexSingle = { "Padtranspose", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -175,7 +176,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadtransposeDouble = { +const Database::DatabaseEntry PadtransposeDouble = { "Padtranspose", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -234,7 +235,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadtransposeComplexDouble = { +const Database::DatabaseEntry PadtransposeComplexDouble = { "Padtranspose", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -292,4 +293,5 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index 191d2e98..c5ea50c2 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::TransposeHalf = { +const Database::DatabaseEntry TransposeHalf = { "Transpose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::TransposeHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::TransposeSingle = { +const Database::DatabaseEntry TransposeSingle = { "Transpose", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -104,7 +105,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TransposeComplexSingle = { +const Database::DatabaseEntry TransposeComplexSingle = { "Transpose", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -169,7 +170,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TransposeDouble = { +const Database::DatabaseEntry TransposeDouble = { "Transpose", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -228,7 +229,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::TransposeComplexDouble = { +const Database::DatabaseEntry TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -280,4 +281,5 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 70be5abc..60471bef 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyHalf = { +const Database::DatabaseEntry XaxpyHalf = { "Xaxpy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::XaxpyHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpySingle = { +const Database::DatabaseEntry XaxpySingle = { "Xaxpy", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -104,7 +105,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyComplexSingle = { +const Database::DatabaseEntry XaxpyComplexSingle = { "Xaxpy", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -175,7 +176,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyDouble = { +const Database::DatabaseEntry XaxpyDouble = { "Xaxpy", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -234,7 +235,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyComplexDouble = { +const Database::DatabaseEntry XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -292,4 +293,5 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index 96a699aa..686b2839 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XdotHalf = { +const Database::DatabaseEntry XdotHalf = { "Xdot", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::XdotHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotSingle = { +const Database::DatabaseEntry XdotSingle = { "Xdot", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -86,7 +87,7 @@ const Database::DatabaseEntry Database::XdotSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotComplexSingle = { +const Database::DatabaseEntry XdotComplexSingle = { "Xdot", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -139,7 +140,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotDouble = { +const Database::DatabaseEntry XdotDouble = { "Xdot", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -181,7 +182,7 @@ const Database::DatabaseEntry Database::XdotDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotComplexDouble = { +const Database::DatabaseEntry XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -222,4 +223,5 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index ffe3dc57..8303fa83 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmHalf = { +const Database::DatabaseEntry XgemmHalf = { "Xgemm", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { @@ -26,7 +27,7 @@ const Database::DatabaseEntry Database::XgemmHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmSingle = { +const Database::DatabaseEntry XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -97,7 +98,7 @@ const Database::DatabaseEntry Database::XgemmSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmComplexSingle = { +const Database::DatabaseEntry XgemmComplexSingle = { "Xgemm", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -168,7 +169,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDouble = { +const Database::DatabaseEntry XgemmDouble = { "Xgemm", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -227,7 +228,7 @@ const Database::DatabaseEntry Database::XgemmDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmComplexDouble = { +const Database::DatabaseEntry XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -284,4 +285,5 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp index 53dfeaa6..89499cc6 100644 --- a/src/database/kernels/xgemm_direct.hpp +++ b/src/database/kernels/xgemm_direct.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDirectHalf = { +const Database::DatabaseEntry XgemmDirectHalf = { "XgemmDirect", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { @@ -26,7 +27,7 @@ const Database::DatabaseEntry Database::XgemmDirectHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDirectSingle = { +const Database::DatabaseEntry XgemmDirectSingle = { "XgemmDirect", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -56,7 +57,7 @@ const Database::DatabaseEntry Database::XgemmDirectSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { +const Database::DatabaseEntry XgemmDirectComplexSingle = { "XgemmDirect", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -86,7 +87,7 @@ const Database::DatabaseEntry Database::XgemmDirectComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDirectDouble = { +const Database::DatabaseEntry XgemmDirectDouble = { "XgemmDirect", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -110,7 +111,7 @@ const Database::DatabaseEntry Database::XgemmDirectDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { +const Database::DatabaseEntry XgemmDirectComplexDouble = { "XgemmDirect", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -133,4 +134,5 @@ const Database::DatabaseEntry Database::XgemmDirectComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index be6606a6..90355b96 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvHalf = { +const Database::DatabaseEntry XgemvHalf = { "Xgemv", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::XgemvHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvSingle = { +const Database::DatabaseEntry XgemvSingle = { "Xgemv", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -97,7 +98,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvComplexSingle = { +const Database::DatabaseEntry XgemvComplexSingle = { "Xgemv", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -157,7 +158,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvDouble = { +const Database::DatabaseEntry XgemvDouble = { "Xgemv", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -209,7 +210,7 @@ const Database::DatabaseEntry Database::XgemvDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvComplexDouble = { +const Database::DatabaseEntry XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -251,4 +252,5 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index cd430dcb..8e6254ac 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastHalf = { +const Database::DatabaseEntry XgemvFastHalf = { "XgemvFast", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::XgemvFastHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastSingle = { +const Database::DatabaseEntry XgemvFastSingle = { "XgemvFast", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -97,7 +98,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastComplexSingle = { +const Database::DatabaseEntry XgemvFastComplexSingle = { "XgemvFast", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -154,7 +155,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastDouble = { +const Database::DatabaseEntry XgemvFastDouble = { "XgemvFast", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -206,7 +207,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastComplexDouble = { +const Database::DatabaseEntry XgemvFastComplexDouble = { "XgemvFast", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -248,4 +249,5 @@ const Database::DatabaseEntry Database::XgemvFastComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 2dd7db32..8fe45e01 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotHalf = { +const Database::DatabaseEntry XgemvFastRotHalf = { "XgemvFastRot", Precision::kHalf, { { // Default kDeviceTypeAll, "default", { @@ -26,7 +27,7 @@ const Database::DatabaseEntry Database::XgemvFastRotHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotSingle = { +const Database::DatabaseEntry XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -67,7 +68,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { +const Database::DatabaseEntry XgemvFastRotComplexSingle = { "XgemvFastRot", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -101,7 +102,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotDouble = { +const Database::DatabaseEntry XgemvFastRotDouble = { "XgemvFastRot", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -132,7 +133,7 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = { +const Database::DatabaseEntry XgemvFastRotComplexDouble = { "XgemvFastRot", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -155,4 +156,5 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp index 7816080f..f2fc2a9a 100644 --- a/src/database/kernels/xger.hpp +++ b/src/database/kernels/xger.hpp @@ -12,9 +12,10 @@ // ================================================================================================= namespace clblast { +namespace database { // ================================================================================================= -const Database::DatabaseEntry Database::XgerHalf = { +const Database::DatabaseEntry XgerHalf = { "Xger", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -33,7 +34,7 @@ const Database::DatabaseEntry Database::XgerHalf = { // ================================================================================================= -const Database::DatabaseEntry Database::XgerSingle = { +const Database::DatabaseEntry XgerSingle = { "Xger", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -92,7 +93,7 @@ const Database::DatabaseEntry Database::XgerSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgerComplexSingle = { +const Database::DatabaseEntry XgerComplexSingle = { "Xger", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -151,7 +152,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgerDouble = { +const Database::DatabaseEntry XgerDouble = { "Xger", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -199,7 +200,7 @@ const Database::DatabaseEntry Database::XgerDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgerComplexDouble = { +const Database::DatabaseEntry XgerComplexDouble = { "Xger", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { @@ -246,4 +247,5 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { }; // ================================================================================================= +} // namespace database } // namespace clblast diff --git a/src/routine.cpp b/src/routine.cpp index d938d66f..80764b74 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -24,7 +24,7 @@ namespace clblast { // Constructor: not much here, because no status codes can be returned Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase): + const std::vector &userDatabase): precision_(precision), routine_name_(name), queue_(queue), diff --git a/src/routine.hpp b/src/routine.hpp index f5c607af..8582a2b7 100644 --- a/src/routine.hpp +++ b/src/routine.hpp @@ -36,7 +36,7 @@ class Routine { // built-in database. explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase = {}); + const std::vector &userDatabase = {}); // Set-up phase of the kernel StatusCode SetUp(); -- cgit v1.2.3