From b5a67f86ecca72b47fc3d0a8231f902752b13c3d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 11 Sep 2016 21:29:28 +0200 Subject: Complete re-write of the database script. Changed Pandas for the much faster and convienient plain JSON/dict data-type --- src/database/kernels/xaxpy.hpp | 4 ++-- src/database/kernels/xdot.hpp | 8 ++------ src/database/kernels/xgemm.hpp | 2 +- src/database/kernels/xgemv.hpp | 4 ++-- src/database/kernels/xgemv_fast.hpp | 4 ++-- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 6e84ca5a..9c1bcd99 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XaxpyHalf = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, - { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, + { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, + { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, } }, } diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index f862d00e..987a990d 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -38,11 +38,10 @@ const Database::DatabaseEntry Database::XdotSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",128}, {"WGS2",32} } }, - { "default", { {"WGS1",256}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -90,11 +89,10 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",128}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",32} } }, - { "default", { {"WGS1",256}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -142,7 +140,6 @@ const Database::DatabaseEntry Database::XdotDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, @@ -184,7 +181,6 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index cc81cf6a..7e793076 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -76,7 +76,7 @@ const Database::DatabaseEntry Database::XgemmSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index 03e84525..e5e8845e 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvHalf = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } }, - { "default", { {"WGS1",128}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",128}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1} } }, } }, } diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index c12fcdca..52af628c 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvFastHalf = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, } }, } -- cgit v1.2.3 From aa3dffe356cc3c85e4d49508a4f21f4becba6e8c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 12 Sep 2016 20:13:38 +0200 Subject: Added XgemvFastRot and Xgemm 16-bit tuning results: just defaults which are now automatically taken from 32-bit if there are no entries at all --- scripts/database/database/clblast.py | 18 +++++++++++++++--- src/database/database.cpp | 4 ++-- src/database/database.hpp | 4 ++-- src/database/kernels/xgemm.hpp | 12 ++++++++++++ src/database/kernels/xgemv_fast_rot.hpp | 12 ++++++++++++ 5 files changed, 43 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index beed46d9..8190f225 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -82,7 +82,7 @@ def print_cpp_database(database, output_dir): """Outputs the database as C++ code""" # Iterates over the kernel families - kernel_families = [s["kernel_family"] for s in database["sections"]] + kernel_families = sorted(set([s["kernel_family"] for s in database["sections"]])) for family_name in kernel_families: family_database = [s for s in database["sections"] if s["kernel_family"] == family_name] @@ -92,15 +92,27 @@ def print_cpp_database(database, output_dir): f.write(get_cpp_header(family_name)) # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464) - precisions = sorted(set([s["precision"] for s in family_database])) + precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database for precision in precisions: precision_database = [s for s in family_database if s["precision"] == precision] f.write(get_cpp_precision(family_name, precision)) - # Loops over a combination of device vendors and device types (e.g. AMD GPU) + # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but + # with the defaults only + if len(precision_database) == 0: + print("[database] No results found for %s:%s, retrieving defaults from %s:32" % + (family_name, precision, family_name)) + precision_database = [s for s in family_database if s["precision"] == "32" + and s["device_vendor"] == VENDOR_DEFAULT + and s["device_type"] == DEVICE_TYPE_DEFAULT + and s["device"] == DEVICE_NAME_DEFAULT] + + # Loops over device vendors (e.g. AMD) device_vendors = sorted(set([s["device_vendor"] for s in precision_database])) for vendor in device_vendors: vendor_database = [s for s in precision_database if s["device_vendor"] == vendor] + + # Loops over device types (e.g. GPU) device_types = sorted(set([s["device_type"] for s in vendor_database])) for device_type in device_types: type_database = [s for s in vendor_database if s["device_type"] == device_type] diff --git a/src/database/database.cpp b/src/database/database.cpp index 38974b95..34c44a29 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -35,9 +35,9 @@ const std::vector Database::database = { XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, - /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, + XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, - /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, + XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, diff --git a/src/database/database.hpp b/src/database/database.hpp index 8d6d3863..a6ab49c5 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -72,9 +72,9 @@ class Database { static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; - static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; + static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; - static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; + static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index 7e793076..d19c55b5 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemmHalf = { + "Xgemm", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 3d2e0d3a..328094e1 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemvFastRotHalf = { + "XgemvFastRot", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, { { // AMD GPUs -- cgit v1.2.3