summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-09-16 20:37:09 +0200
committerGitHub <noreply@github.com>2017-09-16 20:37:09 +0200
commit7d0ef8e10d05ee3a18360295c021ab6a6ef32c2d (patch)
tree55d795f06769134601f017f50d505a6c8904d398 /scripts
parentbb947890dec90712c92028c20234eafd48e6fa3e (diff)
parentbcf39eb79a8252b9f9b0c31311c7951abc8520ee (diff)
Merge pull request #191 from CNugteren/database_improvements
Database improvements
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/database/database.py29
-rw-r--r--scripts/database/database/clblast.py168
-rw-r--r--scripts/database/database/defaults.py84
-rw-r--r--scripts/database/database/io.py43
4 files changed, 211 insertions, 113 deletions
diff --git a/scripts/database/database.py b/scripts/database/database.py
index e398aa30..8f3ccce6 100755
--- a/scripts/database/database.py
+++ b/scripts/database/database.py
@@ -20,14 +20,6 @@ import database.defaults as defaults
# Server storing a copy of the database
DATABASE_SERVER_URL = "https://raw.githubusercontent.com/CNugteren/CLBlast-database/master/database.json"
-# OpenCL vendor names and their short name
-VENDOR_TRANSLATION_TABLE = {
- "GenuineIntel": "Intel",
- "Intel(R) Corporation": "Intel",
- "Advanced Micro Devices, Inc.": "AMD",
- "NVIDIA Corporation": "NVIDIA",
-}
-
def remove_mismatched_arguments(database):
"""Checks for tuning results with mis-matched entries and removes them according to user preferences"""
@@ -44,12 +36,14 @@ def remove_mismatched_arguments(database):
for kernel_group_name, kernel_group in db.group_by(database["sections"], kernel_attributes):
group_by_arguments = db.group_by(kernel_group, clblast.ARGUMENT_ATTRIBUTES)
if len(group_by_arguments) != 1:
- print("[database] WARNING: entries for a single kernel with multiple argument values " + str(kernel_group_name))
- print("[database] Either quit now, or remove all but one of the argument combinations below:")
+ print("[database] WARNING: entries for a single kernel with multiple argument values " +
+ str(kernel_group_name))
+ print("[database] Either quit or remove all but one of the argument combinations below:")
for index, (attribute_group_name, mismatching_entries) in enumerate(group_by_arguments):
print("[database] %d: %s" % (index, attribute_group_name))
for attribute_group_name, mismatching_entries in group_by_arguments:
- response = user_input("[database] Remove entries corresponding to %s, [y/n]? " % str(attribute_group_name))
+ response = user_input("[database] Remove entries corresponding to %s, [y/n]? " %
+ str(attribute_group_name))
if response == "y":
for entry in mismatching_entries:
database["sections"].remove(entry)
@@ -59,7 +53,8 @@ def remove_mismatched_arguments(database):
for kernel_group_name, kernel_group in db.group_by(database["sections"], kernel_attributes):
group_by_arguments = db.group_by(kernel_group, clblast.ARGUMENT_ATTRIBUTES)
if len(group_by_arguments) != 1:
- print("[database] ERROR: entries for a single kernel with multiple argument values " + str(kernel_group_name))
+ print("[database] ERROR: entries for a single kernel with multiple argument values " +
+ str(kernel_group_name))
assert len(group_by_arguments) == 1
@@ -97,7 +92,8 @@ def main(argv):
# Checks whether the command-line arguments are valid
clblast_header = os.path.join(cl_args.clblast_root, "include", "clblast.h") # Not used but just for validation
if not os.path.isfile(clblast_header):
- raise RuntimeError("The path '" + cl_args.clblast_root + "' does not point to the root of the CLBlast library")
+ raise RuntimeError("The path '" + cl_args.clblast_root +
+ "' does not point to the root of the CLBlast library")
if len(glob.glob(json_files)) < 1:
print("[database] The path '" + cl_args.source_folder + "' does not contain any JSON files")
@@ -115,11 +111,6 @@ def main(argv):
sys.stdout.write("[database] Processing '" + file_json + "' ") # No newline printed
imported_data = io.load_tuning_results(file_json)
- # Fixes the problem that some vendors use multiple different names
- for target in VENDOR_TRANSLATION_TABLE:
- if imported_data["device_vendor"] == target:
- imported_data["device_vendor"] = VENDOR_TRANSLATION_TABLE[target]
-
# Adds the new data to the database
old_size = db.length(database)
database = db.add_section(database, imported_data)
@@ -136,7 +127,7 @@ def main(argv):
# Removes database entries before continuing
if cl_args.remove_device is not None:
print("[database] Removing all results for device '%s'" % cl_args.remove_device)
- remove_database_entries(database, {"device": cl_args.remove_device})
+ remove_database_entries(database, {"clblast_device": cl_args.remove_device})
io.save_database(database, database_filename)
# Retrieves the best performing results
diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py
index 803d1d2a..428bfdda 100644
--- a/scripts/database/database/clblast.py
+++ b/scripts/database/database/clblast.py
@@ -7,14 +7,21 @@
import os
+# Type settings (also change in database_structure.hpp)
+STRING_LENGTH = 50
+PARAMETERS_LENGTH = 14
+
# Constants from the C++ code
VENDOR_DEFAULT = "default"
DEVICE_TYPE_DEFAULT = "All"
DEVICE_NAME_DEFAULT = "default"
+DEVICE_NAME_DEFAULT_CONSTANT = "kDeviceNameDefault "
+DEVICE_ARCHITECTURE_DEFAULT = "default"
# List of attributes
-DEVICE_TYPE_ATTRIBUTES = ["device_vendor", "device_type"]
-DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"]
+DEVICE_TYPE_ATTRIBUTES = ["clblast_device_vendor", "clblast_device_type"]
+DEVICE_ATTRIBUTES = ["clblast_device_name", "clblast_device_architecture",
+ "device_core_clock", "device_compute_units"]
KERNEL_ATTRIBUTES = ["precision", "kernel_family"]
ARGUMENT_ATTRIBUTES = ["arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"]
ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ARGUMENT_ATTRIBUTES
@@ -42,20 +49,19 @@ def get_cpp_separator():
return "// ================================================================================================="
-def get_cpp_header(family):
+def get_cpp_header(family, precision):
"""Retrieves the C++ header"""
return ("\n" + get_cpp_separator() + """
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
//
-// This file populates the database with best-found tuning parameters for the '%s' kernels.
+// This file populates the database with best-found tuning parameters for the '%s%s' kernels.
//\n"""
- % family.title() + get_cpp_separator() + \
- "\n\nnamespace clblast {\n" + "namespace database {\n" + get_cpp_separator())
+ % (family.title(), precision)) + get_cpp_separator() + "\n"
+
+
+def get_cpp_header_namespace():
+ return "\nnamespace clblast {\n" + "namespace database {\n"
def get_cpp_footer():
@@ -67,7 +73,7 @@ def get_cpp_precision(family, precision):
"""Retrieves the C++ code for the start of a new precision"""
precision_string = precision_to_string(precision)
camelcase_name = family.title().replace("_", "")
- return("\n\nconst Database::DatabaseEntry %s%s = {\n \"%s\", Precision::k%s"
+ return("\nconst DatabaseEntry %s%s = {\n \"%s\", Precision::k%s"
% (camelcase_name, precision_string, camelcase_name, precision_string))
@@ -79,6 +85,19 @@ def get_cpp_device_vendor(vendor, device_type):
return " { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, device_type, device_type_caps, vendor)
+def get_cpp_family_includes(family, precisions):
+ result = "\n"
+ # result += "#include \"clblast.h\"\n"
+ # result += "#include \"database/database_structure.hpp\"\n"
+ for precision in precisions:
+ result += "#include \"database/kernels/%s/%s_%s.hpp\"\n" % (family, family, precision)
+ return result
+
+
+def print_as_name(name):
+ return "Name{\"%-50s\"}" % name.strip()[:STRING_LENGTH]
+
+
def print_cpp_database(database, output_dir):
"""Outputs the database as C++ code"""
@@ -87,26 +106,30 @@ def print_cpp_database(database, output_dir):
for family_name in kernel_families:
family_database = [s for s in database["sections"] if s["kernel_family"] == family_name]
- # Opens a new file for each kernel family
- full_path = os.path.join(output_dir, family_name + ".hpp")
- with open(full_path, 'w+') as f:
- f.write(get_cpp_header(family_name))
+ # Goes into a new path for each kernel family
+ family_path = os.path.join(output_dir, family_name)
- # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
- precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
- for precision in precisions:
- precision_database = [s for s in family_database if s["precision"] == precision]
+ # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
+ precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
+ for precision in precisions:
+ precision_database = [s for s in family_database if s["precision"] == precision]
+
+ # Opens a new file for each precision
+ full_path = os.path.join(family_path, family_name + "_" + precision + ".hpp")
+ with open(full_path, 'w+') as f:
+ f.write(get_cpp_header(family_name, precision))
+ f.write(get_cpp_header_namespace())
f.write(get_cpp_precision(family_name, precision))
- # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but
- # with the defaults only
+ # In case there is nothing found at all (e.g. 16-bit): continue as if this was a
+ # precision of 32 but with the defaults only
if len(precision_database) == 0:
print("[database] No results found for %s:%s, retrieving defaults from %s:32" %
(family_name, precision, family_name))
precision_database = [s for s in family_database if s["precision"] == "32"
- and s["device_vendor"] == VENDOR_DEFAULT
- and s["device_type"] == DEVICE_TYPE_DEFAULT
- and s["device"] == DEVICE_NAME_DEFAULT]
+ and s["clblast_device_vendor"] == VENDOR_DEFAULT
+ and s["clblast_device_type"] == DEVICE_TYPE_DEFAULT
+ and s["clblast_device_name"] == DEVICE_NAME_DEFAULT]
# Discovers the parameters for this kernel
parameter_names = []
@@ -118,51 +141,72 @@ def print_cpp_database(database, output_dir):
f.write(", {" + parameter_names_as_string + "}, {\n")
# Loops over device vendors (e.g. AMD)
- device_vendors = sorted(set([s["device_vendor"] for s in precision_database]))
+ device_vendors = sorted(set([s["clblast_device_vendor"] for s in precision_database]))
for vendor in device_vendors:
- vendor_database = [s for s in precision_database if s["device_vendor"] == vendor]
+ vendor_database = [s for s in precision_database if s["clblast_device_vendor"] == vendor]
# Loops over device types (e.g. GPU)
- device_types = sorted(set([s["device_type"] for s in vendor_database]))
+ device_types = sorted(set([s["clblast_device_type"] for s in vendor_database]))
for device_type in device_types:
- type_database = [s for s in vendor_database if s["device_type"] == device_type]
+ type_database = [s for s in vendor_database if s["clblast_device_type"] == device_type]
f.write(get_cpp_device_vendor(vendor, device_type))
- # Loops over every device of this vendor-type combination
- devices = sorted(set([s["device"] for s in type_database]))
- for device_name in devices:
- device_database = [s for s in type_database if s["device"] == device_name]
- device_name_quoted = "\"%s\"," % device_name.strip()
- device_name_cpp = " { %-50s { " % device_name_quoted
- f.write(device_name_cpp)
-
- # Collects the parameters for this entry
- parameters = []
- parmameter_index = 0
- kernels = sorted(set([s["kernel"] for s in device_database]))
- for kernel in kernels:
- kernel_database = [s for s in device_database if s["kernel"] == kernel]
-
- assert len(kernel_database) == 1
- results = kernel_database[0]["results"]
-
- assert len(results) == 1
- new_parameters = results[0]["parameters"]
- for parameter_name in sorted(new_parameters):
- assert parameter_name == parameter_names[parmameter_index]
- parameter_value = new_parameters[parameter_name]
- parameters.append(str(parameter_value))
- parmameter_index += 1
-
- # Prints the entry
- f.write(", ".join(parameters))
- f.write(" } },\n")
+ # Loops over every architecture of this vendor-type combination
+ architectures = sorted(set([s["clblast_device_architecture"] for s in type_database]))
+ for architecture in architectures:
+ architecture_database = [s for s in type_database if s["clblast_device_architecture"] == architecture]
+ architecture_string = DEVICE_ARCHITECTURE_DEFAULT if architecture == "" else architecture
+ f.write(" { \"%s\", {\n" % architecture_string)
+
+ # Loops over every device of this vendor-type combination
+ devices = sorted(set([s["clblast_device_name"] for s in architecture_database]))
+ for device_name in devices:
+ device_database = [s for s in architecture_database if s["clblast_device_name"] == device_name]
+ device_name_as_string = print_as_name(device_name) if device_name != DEVICE_NAME_DEFAULT else DEVICE_NAME_DEFAULT_CONSTANT
+ device_name_cpp = " { %s, Params{ " % device_name_as_string
+ f.write(device_name_cpp)
+
+ # Collects the parameters for this entry
+ parameters = []
+ parameter_index = 0
+ kernels = sorted(set([s["kernel"] for s in device_database]))
+ for kernel in kernels:
+ kernel_database = [s for s in device_database if s["kernel"] == kernel]
+
+ assert len(kernel_database) == 1
+ results = kernel_database[0]["results"]
+
+ assert len(results) == 1
+ new_parameters = results[0]["parameters"]
+ for parameter_name in sorted(new_parameters):
+ assert parameter_name == parameter_names[parameter_index]
+ parameter_value = new_parameters[parameter_name]
+ parameters.append(str(parameter_value))
+ parameter_index += 1
+
+ # Appends zero's to complete the list
+ assert parameter_index <= PARAMETERS_LENGTH
+ for append_index in range(parameter_index, PARAMETERS_LENGTH):
+ parameters.append("0")
+
+ # Prints the entry
+ f.write(", ".join(parameters))
+ f.write(" } },\n")
+
+ # Prints the architecture footer
+ f.write(" } },\n")
# Prints the vendor-type combination footer
f.write(" }\n },\n")
# Prints the precision footer
- f.write(" }\n};\n\n" + get_cpp_separator())
+ f.write(" }\n};\n")
+
+ # Prints the file footer
+ f.write(get_cpp_footer())
- # Prints the file footer
- f.write(get_cpp_footer())
+ # Creates the combined family includes header
+ full_path = os.path.join(family_path, family_name + ".hpp")
+ with open(full_path, 'w+') as f:
+ f.write(get_cpp_header(family_name, ""))
+ f.write(get_cpp_family_includes(family_name, precisions))
diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py
index 444c66df..6042c374 100644
--- a/scripts/database/database/defaults.py
+++ b/scripts/database/database/defaults.py
@@ -12,14 +12,6 @@ import clblast
import bests
-def set_default_device(section):
- """Sets the device name and parameters to some default values"""
- section["device"] = clblast.DEVICE_NAME_DEFAULT
- section["device_compute_units"] = 0
- section["device_core_clock"] = 0
- return section
-
-
def set_identifiers(database, group_by_attributes, identifier_name):
"""Sets a group-identifier based on a given set of attributes. Modifies the database but also returns a list of
unique identifiers."""
@@ -55,32 +47,56 @@ def get_groups_by_identifier(database, group_identifiers, identifier_name):
return groups
-def calculate_defaults(database, verbose):
- """Sets defaults for devices of the same type/vendor"""
+def add_default_sections(database, grouping, verbose, values_dict, condition, enable_warning):
+ default_sections = []
- # Groups the database by kernel, vendor and device type (e.g. AMD GPU)
- group_identifiers = set_identifiers(database, clblast.GROUP_ATTRIBUTES, "group_identifier")
+ # Groups the database by a certain grouping
+ group_identifiers = set_identifiers(database, grouping, "group_identifier")
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
# Loops over all groups
- default_sections = {"sections": []}
for group, group_identifier in groups:
# Computes the best parameters
- default_parameters = get_common_best_parameters(group, group_identifier, verbose)
+ default_parameters = get_common_best_parameters(group, group_identifier, verbose, enable_warning)
+ assert len(group) > 0
+ if condition(group[0]):
# Stores all the section's data
- assert len(group) > 0
- default_section = {}
- for attribute in group[0].keys():
- if attribute != "results" and attribute != "group_identifier":
- default_section[attribute] = group[0][attribute]
- default_section = set_default_device(default_section)
- default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
- default_sections["sections"].append(default_section)
+ default_section = {}
+ for attribute in group[0].keys():
+ if attribute != "results" and attribute != "group_identifier":
+ default_section[attribute] = group[0][attribute]
+ default_section["clblast_device_compute_units"] = 0
+ default_section["clblast_device_core_clock"] = 0
+ for key in values_dict.keys():
+ default_section[key] = values_dict[key]
+ default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
+ default_sections.append(default_section)
+ return default_sections
- # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! This is to check for
- # mis-matched arguments.
+
+def calculate_defaults(database, verbose):
+ """Sets defaults for devices of the same type/vendor"""
+ default_sections = {"sections": []}
+
+ # Groups the database by kernel, vendor and device architecture (e.g. AMD GPU "Fiji")
+ architecture_group = clblast.GROUP_ATTRIBUTES + ["clblast_device_architecture"]
+ architecture_defaults = add_default_sections(database, architecture_group, verbose,
+ {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT},
+ lambda entry: True, enable_warning=False)
+
+ # Groups the database by kernel, vendor and device type (e.g. AMD GPU)
+ device_defaults = add_default_sections(database, clblast.GROUP_ATTRIBUTES, verbose,
+ {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT,
+ "clblast_device_architecture": clblast.DEVICE_ARCHITECTURE_DEFAULT},
+ lambda entry: entry["clblast_device_architecture"] != "",
+ enable_warning=True)
+ default_sections["sections"].extend(device_defaults)
+
+ # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments!
+ # This is to check for mis-matched arguments in the database. Note: this is not a check on the
+ # architecture defaults
attributes = clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"]
group_identifiers = set_identifiers(default_sections, attributes, "temp_identifier")
groups = get_groups_by_identifier(default_sections, group_identifiers, "temp_identifier")
@@ -90,6 +106,9 @@ def calculate_defaults(database, verbose):
assert len(group) == 1
remove_identifiers(default_sections, "temp_identifier")
+ # Adds the architecture defaults only after running the above check
+ default_sections["sections"].extend(architecture_defaults)
+
# Groups the database by kernel only
group_identifiers = set_identifiers(database, clblast.KERNEL_ATTRIBUTES + ["kernel"], "group_identifier")
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
@@ -98,7 +117,8 @@ def calculate_defaults(database, verbose):
for group, group_identifier in groups:
# Computes the best parameters
- default_parameters = get_common_best_parameters(group, group_identifier, verbose)
+ default_parameters = get_common_best_parameters(group, group_identifier, verbose,
+ enable_warning=True)
# Stores all the section's data
assert len(group) > 0
@@ -106,9 +126,12 @@ def calculate_defaults(database, verbose):
for attribute in group[0].keys():
if attribute != "results" and attribute != "group_identifier":
default_section[attribute] = group[0][attribute]
- default_section = set_default_device(default_section)
- default_section["device_vendor"] = clblast.VENDOR_DEFAULT
- default_section["device_type"] = clblast.DEVICE_TYPE_DEFAULT
+ default_section["clblast_device_name"] = clblast.DEVICE_NAME_DEFAULT
+ default_section["clblast_device_architecture"] = clblast.DEVICE_ARCHITECTURE_DEFAULT
+ default_section["clblast_device_vendor"] = clblast.VENDOR_DEFAULT
+ default_section["clblast_device_type"] = clblast.DEVICE_TYPE_DEFAULT
+ default_section["clblast_device_compute_units"] = 0
+ default_section["clblast_device_core_clock"] = 0
default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
default_sections["sections"].append(default_section)
@@ -143,7 +166,7 @@ def get_parameter_names(section):
return [result["parameters"] for result in section["results"]]
-def get_common_best_parameters(group, group_identifier, verbose):
+def get_common_best_parameters(group, group_identifier, verbose, enable_warning):
"""Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
the smallest best execution time"""
@@ -179,7 +202,8 @@ def get_common_best_parameters(group, group_identifier, verbose):
# Fall back method in case there are no shared entries at all across devices
if num_devices_common == 1:
- print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all")
+ if enable_warning:
+ print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all")
smallest_best_parameters = get_smallest_best_parameters(group)
if verbose:
print("[database] " + str(group_identifier))
diff --git a/scripts/database/database/io.py b/scripts/database/database/io.py
index 58f9103b..722658d8 100644
--- a/scripts/database/database/io.py
+++ b/scripts/database/database/io.py
@@ -26,14 +26,53 @@ def load_database(filename):
"""Loads a database from disk"""
print("[database] Loading database from '" + filename + "'")
with open(filename) as f:
- return json.load(f)
+ database = json.load(f)
+ return decompress_database(database)
def save_database(database, filename):
"""Saves a database to disk"""
+ compressed_db = compress_database(database)
print("[database] Saving database to '" + filename + "'")
with open(filename, "w") as f:
- json.dump(database, f, sort_keys=True, indent=4)
+ json.dump(compressed_db, f, sort_keys=True, indent=2, separators=(',', ': '))
+
+
+def compress_database(database):
+ """Moves certain common fields up in the hierarchy, transforms dicts into lists"""
+ new_sections = []
+ for section in database["sections"]:
+ new_section = {}
+ for field in section:
+ if field == "results":
+ parameter_names = [result["parameters"].keys() for result in section["results"]]
+ assert len(list(set([" ".join(p) for p in parameter_names]))) == 1
+ new_section["parameter_names"] = parameter_names[0] # they are all be the same
+ new_results = [[",".join([str(v) for v in result["parameters"].values()]),
+ result["time"]]
+ for result in section["results"]]
+ new_section[field] = new_results
+ else:
+ new_section[field] = section[field]
+ new_sections.append(new_section)
+ return {"sections": new_sections}
+
+
+def decompress_database(database):
+ """Undo the above compression"""
+ for section in database["sections"]:
+ new_results = []
+ for result in section["results"]:
+ parameters = {}
+ for name, value in zip(section["parameter_names"], result[0].split(",")):
+ parameters[name] = value
+ new_result = {
+ "parameters": parameters,
+ "time": result[1]
+ }
+ new_results.append(new_result)
+ section["results"] = new_results
+ return database
def load_tuning_results(filename):