From 0d13d814c21f1fdb3b0781b84bf12914aec5225a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 14 Sep 2017 21:27:33 +0200 Subject: Added architecture layer in the tuning database for better performance on unseen devices --- scripts/database/database/clblast.py | 69 +++++++++++++++++------------- scripts/database/database/defaults.py | 80 +++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 57 deletions(-) (limited to 'scripts') diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index 9ce502ee..d0245dec 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -11,6 +11,7 @@ import os VENDOR_DEFAULT = "default" DEVICE_TYPE_DEFAULT = "All" DEVICE_NAME_DEFAULT = "default" +DEVICE_ARCHITECTURE_DEFAULT = "default" # List of attributes DEVICE_TYPE_ATTRIBUTES = ["clblast_device_vendor", "clblast_device_type"] @@ -141,35 +142,45 @@ def print_cpp_database(database, output_dir): type_database = [s for s in vendor_database if s["clblast_device_type"] == device_type] f.write(get_cpp_device_vendor(vendor, device_type)) - # Loops over every device of this vendor-type combination - devices = sorted(set([s["clblast_device_name"] for s in type_database])) - for device_name in devices: - device_database = [s for s in type_database if s["clblast_device_name"] == device_name] - device_name_quoted = "\"%s\"," % device_name.strip() - device_name_cpp = " { %-50s { " % device_name_quoted - f.write(device_name_cpp) - - # Collects the parameters for this entry - parameters = [] - parameter_index = 0 - kernels = sorted(set([s["kernel"] for s in device_database])) - for kernel in kernels: - kernel_database = [s for s in device_database if s["kernel"] == kernel] - - assert len(kernel_database) == 1 - results = kernel_database[0]["results"] - - assert len(results) == 1 - new_parameters = results[0]["parameters"] - for parameter_name in sorted(new_parameters): - assert parameter_name == parameter_names[parameter_index] - parameter_value = new_parameters[parameter_name] - parameters.append(str(parameter_value)) - parameter_index += 1 - - # Prints the entry - f.write(", ".join(parameters)) - f.write(" } },\n") + # Loops over every architecture of this vendor-type combination + architectures = sorted(set([s["clblast_device_architecture"] for s in type_database])) + for architecture in architectures: + architecture_database = [s for s in type_database if s["clblast_device_architecture"] == architecture] + architecture_string = DEVICE_ARCHITECTURE_DEFAULT if architecture == "" else architecture + f.write(" { \"%s\", {\n" % architecture_string) + + # Loops over every device of this vendor-type combination + devices = sorted(set([s["clblast_device_name"] for s in architecture_database])) + for device_name in devices: + device_database = [s for s in architecture_database if s["clblast_device_name"] == device_name] + device_name_quoted = "\"%s\"," % device_name.strip() + device_name_cpp = " { %-50s { " % device_name_quoted + f.write(device_name_cpp) + + # Collects the parameters for this entry + parameters = [] + parameter_index = 0 + kernels = sorted(set([s["kernel"] for s in device_database])) + for kernel in kernels: + kernel_database = [s for s in device_database if s["kernel"] == kernel] + + assert len(kernel_database) == 1 + results = kernel_database[0]["results"] + + assert len(results) == 1 + new_parameters = results[0]["parameters"] + for parameter_name in sorted(new_parameters): + assert parameter_name == parameter_names[parameter_index] + parameter_value = new_parameters[parameter_name] + parameters.append(str(parameter_value)) + parameter_index += 1 + + # Prints the entry + f.write(", ".join(parameters)) + f.write(" } },\n") + + # Prints the architecture footer + f.write(" } },\n") # Prints the vendor-type combination footer f.write(" }\n },\n") diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 3d11de34..6042c374 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -12,14 +12,6 @@ import clblast import bests -def set_default_device(section): - """Sets the device name and parameters to some default values""" - section["clblast_device_name"] = clblast.DEVICE_NAME_DEFAULT - section["clblast_device_compute_units"] = 0 - section["clblast_device_core_clock"] = 0 - return section - - def set_identifiers(database, group_by_attributes, identifier_name): """Sets a group-identifier based on a given set of attributes. Modifies the database but also returns a list of unique identifiers.""" @@ -55,32 +47,56 @@ def get_groups_by_identifier(database, group_identifiers, identifier_name): return groups -def calculate_defaults(database, verbose): - """Sets defaults for devices of the same type/vendor""" +def add_default_sections(database, grouping, verbose, values_dict, condition, enable_warning): + default_sections = [] - # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - group_identifiers = set_identifiers(database, clblast.GROUP_ATTRIBUTES, "group_identifier") + # Groups the database by a certain grouping + group_identifiers = set_identifiers(database, grouping, "group_identifier") groups = get_groups_by_identifier(database, group_identifiers, "group_identifier") # Loops over all groups - default_sections = {"sections": []} for group, group_identifier in groups: # Computes the best parameters - default_parameters = get_common_best_parameters(group, group_identifier, verbose) + default_parameters = get_common_best_parameters(group, group_identifier, verbose, enable_warning) + assert len(group) > 0 + if condition(group[0]): # Stores all the section's data - assert len(group) > 0 - default_section = {} - for attribute in group[0].keys(): - if attribute != "results" and attribute != "group_identifier": - default_section[attribute] = group[0][attribute] - default_section = set_default_device(default_section) - default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] - default_sections["sections"].append(default_section) + default_section = {} + for attribute in group[0].keys(): + if attribute != "results" and attribute != "group_identifier": + default_section[attribute] = group[0][attribute] + default_section["clblast_device_compute_units"] = 0 + default_section["clblast_device_core_clock"] = 0 + for key in values_dict.keys(): + default_section[key] = values_dict[key] + default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] + default_sections.append(default_section) + return default_sections - # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! This is to check for - # mis-matched arguments. + +def calculate_defaults(database, verbose): + """Sets defaults for devices of the same type/vendor""" + default_sections = {"sections": []} + + # Groups the database by kernel, vendor and device architecture (e.g. AMD GPU "Fiji") + architecture_group = clblast.GROUP_ATTRIBUTES + ["clblast_device_architecture"] + architecture_defaults = add_default_sections(database, architecture_group, verbose, + {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT}, + lambda entry: True, enable_warning=False) + + # Groups the database by kernel, vendor and device type (e.g. AMD GPU) + device_defaults = add_default_sections(database, clblast.GROUP_ATTRIBUTES, verbose, + {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT, + "clblast_device_architecture": clblast.DEVICE_ARCHITECTURE_DEFAULT}, + lambda entry: entry["clblast_device_architecture"] != "", + enable_warning=True) + default_sections["sections"].extend(device_defaults) + + # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! + # This is to check for mis-matched arguments in the database. Note: this is not a check on the + # architecture defaults attributes = clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] group_identifiers = set_identifiers(default_sections, attributes, "temp_identifier") groups = get_groups_by_identifier(default_sections, group_identifiers, "temp_identifier") @@ -90,6 +106,9 @@ def calculate_defaults(database, verbose): assert len(group) == 1 remove_identifiers(default_sections, "temp_identifier") + # Adds the architecture defaults only after running the above check + default_sections["sections"].extend(architecture_defaults) + # Groups the database by kernel only group_identifiers = set_identifiers(database, clblast.KERNEL_ATTRIBUTES + ["kernel"], "group_identifier") groups = get_groups_by_identifier(database, group_identifiers, "group_identifier") @@ -98,7 +117,8 @@ def calculate_defaults(database, verbose): for group, group_identifier in groups: # Computes the best parameters - default_parameters = get_common_best_parameters(group, group_identifier, verbose) + default_parameters = get_common_best_parameters(group, group_identifier, verbose, + enable_warning=True) # Stores all the section's data assert len(group) > 0 @@ -106,9 +126,12 @@ def calculate_defaults(database, verbose): for attribute in group[0].keys(): if attribute != "results" and attribute != "group_identifier": default_section[attribute] = group[0][attribute] - default_section = set_default_device(default_section) + default_section["clblast_device_name"] = clblast.DEVICE_NAME_DEFAULT + default_section["clblast_device_architecture"] = clblast.DEVICE_ARCHITECTURE_DEFAULT default_section["clblast_device_vendor"] = clblast.VENDOR_DEFAULT default_section["clblast_device_type"] = clblast.DEVICE_TYPE_DEFAULT + default_section["clblast_device_compute_units"] = 0 + default_section["clblast_device_core_clock"] = 0 default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] default_sections["sections"].append(default_section) @@ -143,7 +166,7 @@ def get_parameter_names(section): return [result["parameters"] for result in section["results"]] -def get_common_best_parameters(group, group_identifier, verbose): +def get_common_best_parameters(group, group_identifier, verbose, enable_warning): """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve the smallest best execution time""" @@ -179,7 +202,8 @@ def get_common_best_parameters(group, group_identifier, verbose): # Fall back method in case there are no shared entries at all across devices if num_devices_common == 1: - print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all") + if enable_warning: + print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all") smallest_best_parameters = get_smallest_best_parameters(group) if verbose: print("[database] " + str(group_identifier)) -- cgit v1.2.3