summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-09-12 20:13:38 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-09-12 20:13:38 +0200
commitaa3dffe356cc3c85e4d49508a4f21f4becba6e8c (patch)
treebe0853996d6d722206d678a86882d21d863cd5ab
parentb5a67f86ecca72b47fc3d0a8231f902752b13c3d (diff)
Added XgemvFastRot and Xgemm 16-bit tuning results: just defaults which are now automatically taken from 32-bit if there are no entries at all
-rw-r--r--scripts/database/database/clblast.py18
-rw-r--r--src/database/database.cpp4
-rw-r--r--src/database/database.hpp4
-rw-r--r--src/database/kernels/xgemm.hpp12
-rw-r--r--src/database/kernels/xgemv_fast_rot.hpp12
5 files changed, 43 insertions, 7 deletions
diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py
index beed46d9..8190f225 100644
--- a/scripts/database/database/clblast.py
+++ b/scripts/database/database/clblast.py
@@ -82,7 +82,7 @@ def print_cpp_database(database, output_dir):
"""Outputs the database as C++ code"""
# Iterates over the kernel families
- kernel_families = [s["kernel_family"] for s in database["sections"]]
+ kernel_families = sorted(set([s["kernel_family"] for s in database["sections"]]))
for family_name in kernel_families:
family_database = [s for s in database["sections"] if s["kernel_family"] == family_name]
@@ -92,15 +92,27 @@ def print_cpp_database(database, output_dir):
f.write(get_cpp_header(family_name))
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
- precisions = sorted(set([s["precision"] for s in family_database]))
+ precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
for precision in precisions:
precision_database = [s for s in family_database if s["precision"] == precision]
f.write(get_cpp_precision(family_name, precision))
- # Loops over a combination of device vendors and device types (e.g. AMD GPU)
+ # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but
+ # with the defaults only
+ if len(precision_database) == 0:
+ print("[database] No results found for %s:%s, retrieving defaults from %s:32" %
+ (family_name, precision, family_name))
+ precision_database = [s for s in family_database if s["precision"] == "32"
+ and s["device_vendor"] == VENDOR_DEFAULT
+ and s["device_type"] == DEVICE_TYPE_DEFAULT
+ and s["device"] == DEVICE_NAME_DEFAULT]
+
+ # Loops over device vendors (e.g. AMD)
device_vendors = sorted(set([s["device_vendor"] for s in precision_database]))
for vendor in device_vendors:
vendor_database = [s for s in precision_database if s["device_vendor"] == vendor]
+
+ # Loops over device types (e.g. GPU)
device_types = sorted(set([s["device_type"] for s in vendor_database]))
for device_type in device_types:
type_database = [s for s in vendor_database if s["device_type"] == device_type]
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 38974b95..34c44a29 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -35,9 +35,9 @@ const std::vector<Database::DatabaseEntry> Database::database = {
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble,
- /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
+ XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
- /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
+ XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,
diff --git a/src/database/database.hpp b/src/database/database.hpp
index 8d6d3863..a6ab49c5 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -72,9 +72,9 @@ class Database {
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble;
- static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
+ static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
- static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
+ static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp
index 7e793076..d19c55b5 100644
--- a/src/database/kernels/xgemm.hpp
+++ b/src/database/kernels/xgemm.hpp
@@ -14,6 +14,18 @@
namespace clblast {
// =================================================================================================
+const Database::DatabaseEntry Database::XgemmHalf = {
+ "Xgemm", Precision::kHalf, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
const Database::DatabaseEntry Database::XgemmSingle = {
"Xgemm", Precision::kSingle, {
{ // AMD GPUs
diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp
index 3d2e0d3a..328094e1 100644
--- a/src/database/kernels/xgemv_fast_rot.hpp
+++ b/src/database/kernels/xgemv_fast_rot.hpp
@@ -14,6 +14,18 @@
namespace clblast {
// =================================================================================================
+const Database::DatabaseEntry Database::XgemvFastRotHalf = {
+ "XgemvFastRot", Precision::kHalf, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
const Database::DatabaseEntry Database::XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, {
{ // AMD GPUs