diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-12 20:13:38 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-12 20:13:38 +0200 |
commit | aa3dffe356cc3c85e4d49508a4f21f4becba6e8c (patch) | |
tree | be0853996d6d722206d678a86882d21d863cd5ab | |
parent | b5a67f86ecca72b47fc3d0a8231f902752b13c3d (diff) |
Added XgemvFastRot and Xgemm 16-bit tuning results: just defaults which are now automatically taken from 32-bit if there are no entries at all
-rw-r--r-- | scripts/database/database/clblast.py | 18 | ||||
-rw-r--r-- | src/database/database.cpp | 4 | ||||
-rw-r--r-- | src/database/database.hpp | 4 | ||||
-rw-r--r-- | src/database/kernels/xgemm.hpp | 12 | ||||
-rw-r--r-- | src/database/kernels/xgemv_fast_rot.hpp | 12 |
5 files changed, 43 insertions, 7 deletions
diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index beed46d9..8190f225 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -82,7 +82,7 @@ def print_cpp_database(database, output_dir): """Outputs the database as C++ code""" # Iterates over the kernel families - kernel_families = [s["kernel_family"] for s in database["sections"]] + kernel_families = sorted(set([s["kernel_family"] for s in database["sections"]])) for family_name in kernel_families: family_database = [s for s in database["sections"] if s["kernel_family"] == family_name] @@ -92,15 +92,27 @@ def print_cpp_database(database, output_dir): f.write(get_cpp_header(family_name)) # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464) - precisions = sorted(set([s["precision"] for s in family_database])) + precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database for precision in precisions: precision_database = [s for s in family_database if s["precision"] == precision] f.write(get_cpp_precision(family_name, precision)) - # Loops over a combination of device vendors and device types (e.g. AMD GPU) + # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but + # with the defaults only + if len(precision_database) == 0: + print("[database] No results found for %s:%s, retrieving defaults from %s:32" % + (family_name, precision, family_name)) + precision_database = [s for s in family_database if s["precision"] == "32" + and s["device_vendor"] == VENDOR_DEFAULT + and s["device_type"] == DEVICE_TYPE_DEFAULT + and s["device"] == DEVICE_NAME_DEFAULT] + + # Loops over device vendors (e.g. AMD) device_vendors = sorted(set([s["device_vendor"] for s in precision_database])) for vendor in device_vendors: vendor_database = [s for s in precision_database if s["device_vendor"] == vendor] + + # Loops over device types (e.g. GPU) device_types = sorted(set([s["device_type"] for s in vendor_database])) for device_type in device_types: type_database = [s for s in vendor_database if s["device_type"] == device_type] diff --git a/src/database/database.cpp b/src/database/database.cpp index 38974b95..34c44a29 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -35,9 +35,9 @@ const std::vector<Database::DatabaseEntry> Database::database = { XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, - /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, + XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, - /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, + XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, diff --git a/src/database/database.hpp b/src/database/database.hpp index 8d6d3863..a6ab49c5 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -72,9 +72,9 @@ class Database { static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; - static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; + static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; - static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; + static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index 7e793076..d19c55b5 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemmHalf = { + "Xgemm", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 3d2e0d3a..328094e1 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemvFastRotHalf = { + "XgemvFastRot", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, { { // AMD GPUs |