scripts/database/database/defaults.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
# PEP8 Python style guide and uses a max-width of 120 characters per line.
#
# Author(s):
#   Cedric Nugteren <www.cedricnugteren.nl>

import pandas as pd

import clblast
import bests


def set_default_device(database_entry):
    """Sets the device name and parameters to some default values"""
    database_entry["device"] = clblast.DEVICE_NAME_DEFAULT
    database_entry["device_compute_units"] = 0
    database_entry["device_core_clock"] = 0
    return database_entry


def set_default_time(database_entry):
    """Sets the execution time to some default value"""
    database_entry["time"] = 0.0
    return database_entry


def calculate_defaults(database, verbose, calculate_common_best=True):
    """Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults."""
    database_defaults = pd.DataFrame()

    # Defaults per combination of device vendors and device types (e.g. AMD GPU)
    database_type_vendor = database.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] +
                                            clblast.ARGUMENT_ATTRIBUTES)
    for group_name, database_group in database_type_vendor:
        if calculate_common_best:
            default_values = get_common_best(database_group, group_name, verbose)
        else:
            default_values = get_smallest_best(database_group)
        default_values = set_default_device(default_values)
        default_values = set_default_time(default_values)
        database_defaults = database_defaults.append(default_values, ignore_index=True)

    # Checks for mis-matched arguments
    groups = database_defaults.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"])
    for group_name, database_group in groups:
        if len(database_group) != 1:
            description = database_group["kernel"].min() + " " + database_group["device_vendor"].min()
            print("[WARNING] Entries for a single kernel with multiple argument values: " + description)

    # Defaults over all device types and vendors
    groups = database.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES)
    for group_name, database_group in groups:
        default_values = get_smallest_best(database_group)
        default_values["device_vendor"] = clblast.VENDOR_DEFAULT
        default_values["device_type"] = clblast.DEVICE_TYPE_DEFAULT
        default_values = set_default_device(default_values)
        default_values = set_default_time(default_values)
        database_defaults = database_defaults.append(default_values, ignore_index=True)

    # Database with both types of defaults only
    return database_defaults


def get_smallest_best(database):
    """Sets defaults based on the smallest values of all known entries. The average might be better for performance but
    some parameters might not be supported on other devices."""
    database_best_results = bests.get_best_results(database)
    return database_best_results.min(axis=0)


def get_common_best(database, group_name, verbose):
    """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
    not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
    the smallest best execution time"""

    # Counts the number of devices in this group
    num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES))

    # Removes columns without any values
    database = database.dropna(axis=1, how='all')
    database = database.reset_index()

    # Inserts the relative execution times into the database
    def relative_performance(x):
        x["relative_performance"] = x["time"].min() / x["time"]
        return x
    database = database.groupby(clblast.ATTRIBUTES + ["kernel"]).apply(relative_performance)

    # Retrieves the parameter names for this kernel
    all_column_names = list(database.columns.values)
    parameter_column_names = [c for c in all_column_names if "parameters." in c]

    # Removes entries which are not available for all devices
    database_by_parameters = database.groupby(parameter_column_names)
    database_common = database_by_parameters.filter(lambda x: len(x) == num_devices)

    # Fall back to another method in case there are no shared entries at all across devices
    if len(database_common) == 0:
        # print("[database] Skipping: " + str(group_name) + " with devices: %d %d " % (num_devices, len(database)))
        return get_smallest_best(database)

    # Computes the sum of the execution times over the different devices
    def sum_performance(x):
        x["group_performance"] = x["relative_performance"].sum()
        return x
    database_common = database_common.groupby(parameter_column_names).apply(sum_performance)

    # Retrieves the entries with the highest performance
    best_performance = database_common["group_performance"].max()
    database_bests = database_common[database_common["group_performance"] == best_performance]

    # Retrieves one example only (the parameters are the same anyway)
    database_bests = database_bests.drop_duplicates(["group_performance"])

    # Completed, report and return the results
    if verbose:
        print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " +
        str(num_devices) + " " + str(database_bests.shape))
    assert len(database_bests) == 1
    return database_bests