From 3f5401d4c8947945c4770fb1dfd354892702195f Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Aug 2016 16:25:38 +0200 Subject: Added a first version of the database's common-best default calculation --- scripts/database/database.py | 3 +- scripts/database/database/defaults.py | 70 +++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index e115d68c..0ce89d03 100755 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -98,7 +98,8 @@ def main(argv): database_best_results = bests.get_best_results(database) # Determines the defaults for other vendors and per vendor - database_defaults = defaults.calculate_defaults(database_best_results) + print("[database] Calculating the default values...") + database_defaults = defaults.calculate_defaults(database) database_best_results = db.concatenate_database(database_best_results, database_defaults) # Outputs the database as a C++ database diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 357c3a3a..fca793ea 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -6,7 +6,9 @@ # Cedric Nugteren import pandas as pd + import clblast +import bests def set_default_device(database_entry): @@ -23,16 +25,18 @@ def set_default_time(database_entry): return database_entry -def calculate_defaults(df): - """# Sets defaults for devices of the same type/vendor based on the smallest values of all known entries. The average - might be better for performance but some parameters might not be supported on other devices.""" +def calculate_defaults(database, calculate_common_best=True): + """Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults.""" database_defaults = pd.DataFrame() # Defaults per combination of device vendors and device types (e.g. AMD GPU) - database_type_vendor = df.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] + - clblast.ARGUMENT_ATTRIBUTES) + database_type_vendor = database.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] + + clblast.ARGUMENT_ATTRIBUTES) for group_name, database_group in database_type_vendor: - default_values = database_group.min(axis=0) + if calculate_common_best: + default_values = get_common_best(database_group, group_name) + else: + default_values = get_smallest_best(database_group) default_values = set_default_device(default_values) default_values = set_default_time(default_values) database_defaults = database_defaults.append(default_values, ignore_index=True) @@ -45,9 +49,9 @@ def calculate_defaults(df): print("[WARNING] Entries for a single kernel with multiple argument values: " + description) # Defaults over all device types and vendors - groups = df.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES) + groups = database.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES) for group_name, database_group in groups: - default_values = database_group.min(axis=0) + default_values = get_smallest_best(database_group) default_values["device_vendor"] = clblast.VENDOR_DEFAULT default_values["device_type"] = clblast.DEVICE_TYPE_DEFAULT default_values = set_default_device(default_values) @@ -56,3 +60,53 @@ def calculate_defaults(df): # Database with both types of defaults only return database_defaults + + +def get_smallest_best(database): + """Sets defaults based on the smallest values of all known entries. The average might be better for performance but + some parameters might not be supported on other devices.""" + database_best_results = bests.get_best_results(database) + return database_best_results.min(axis=0) + + +def get_common_best(database, group_name): + """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case + not every device was tuned with the same parameters.""" + # TODO: Quite a bit slower than the above `get_smallest_best` method + + # Counts the number of devices in this group + num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES)) + + # Removes columns without any values + database = database.dropna(axis=1, how='all') + + # Retrieves the parameter names for this kernel + all_column_names = list(database.columns.values) + parameter_column_names = [c for c in all_column_names if "parameters." in c] + + # Removes entries which are not available for all devices + database_common = pd.DataFrame() + database_by_parameters = database.groupby(parameter_column_names) + for parameter_values, database_parameters in database_by_parameters: + num_entries = database_parameters.shape[0] + if num_entries == num_devices: + database_common = database_common.append(database_parameters) + + # Fall back to another method in case there are no shared entries at all across devices + if database_common.shape[0] == 0: + # print("Skipping: " + str(group_name) + " with devices: " + str(num_devices) + " " + str(database.shape[0])) + return get_smallest_best(database) + + # Computes the sum of the execution times over the different devices + database_common['time'] = database_common.groupby(parameter_column_names)['time'].transform(sum) + + # Retrieves the entries with the best execution time + best_time = database_common["time"].min() + database_bests = database_common[database_common["time"] == best_time] + + # Retrieves one example only (the parameters are the same anyway) + database_bests = database_bests.drop_duplicates(["time"]) + # print(str(group_name) + " with num devices: " + str(num_devices) + " " + str(database_bests.shape)) + assert database_bests.shape[0] == 1 + + return database_bests -- cgit v1.2.3 From 7da6492b36cae7ba8859cd6d6ab3250e11f9a2b8 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 9 Aug 2016 21:06:04 +0200 Subject: Improved the speed of the new common-best defaults method for the database generation --- scripts/database/database/defaults.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index fca793ea..48693247 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -85,28 +85,26 @@ def get_common_best(database, group_name): parameter_column_names = [c for c in all_column_names if "parameters." in c] # Removes entries which are not available for all devices - database_common = pd.DataFrame() database_by_parameters = database.groupby(parameter_column_names) - for parameter_values, database_parameters in database_by_parameters: - num_entries = database_parameters.shape[0] - if num_entries == num_devices: - database_common = database_common.append(database_parameters) + database_common = database_by_parameters.filter(lambda x: len(x) == num_devices) # Fall back to another method in case there are no shared entries at all across devices - if database_common.shape[0] == 0: - # print("Skipping: " + str(group_name) + " with devices: " + str(num_devices) + " " + str(database.shape[0])) + if len(database_common) == 0: + # print("[database] Skipping: " + str(group_name) + " with devices: %d %d " % (num_devices, len(database))) return get_smallest_best(database) # Computes the sum of the execution times over the different devices - database_common['time'] = database_common.groupby(parameter_column_names)['time'].transform(sum) + database_common_by_parameters = database_common.groupby(parameter_column_names) + group_times = database_common_by_parameters['time'].transform(sum) + database_common.loc[:, 'group_time'] = group_times # Retrieves the entries with the best execution time - best_time = database_common["time"].min() - database_bests = database_common[database_common["time"] == best_time] + best_time = database_common["group_time"].min() + database_bests = database_common[database_common["group_time"] == best_time] # Retrieves one example only (the parameters are the same anyway) - database_bests = database_bests.drop_duplicates(["time"]) - # print(str(group_name) + " with num devices: " + str(num_devices) + " " + str(database_bests.shape)) - assert database_bests.shape[0] == 1 + database_bests = database_bests.drop_duplicates(["group_time"]) + # print("[database] " + str(group_name) + " with devices: " + str(num_devices) + " " + str(database_bests.shape)) + assert len(database_bests) == 1 return database_bests -- cgit v1.2.3 From 7d5631b7e4bb011725b512b55fefa1fa8165a8dd Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 15 Aug 2016 21:01:07 +0200 Subject: Updated the database script to calculate the relative best performance of tuning results common for a device/vendor type --- scripts/database/database.py | 2 +- scripts/database/database/defaults.py | 39 +++++++++++++++++++++------------ src/database/kernels/copy.hpp | 22 +++++++++---------- src/database/kernels/pad.hpp | 28 +++++++++++------------ src/database/kernels/padtranspose.hpp | 22 +++++++++---------- src/database/kernels/transpose.hpp | 26 +++++++++++----------- src/database/kernels/xaxpy.hpp | 22 +++++++++---------- src/database/kernels/xdot.hpp | 18 +++++++-------- src/database/kernels/xgemv.hpp | 14 ++++++------ src/database/kernels/xgemv_fast.hpp | 10 ++++----- src/database/kernels/xgemv_fast_rot.hpp | 4 ++-- src/database/kernels/xger.hpp | 28 +++++++++++------------ 12 files changed, 123 insertions(+), 112 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 0ce89d03..6d370d99 100755 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -99,7 +99,7 @@ def main(argv): # Determines the defaults for other vendors and per vendor print("[database] Calculating the default values...") - database_defaults = defaults.calculate_defaults(database) + database_defaults = defaults.calculate_defaults(database, cl_args.verbose) database_best_results = db.concatenate_database(database_best_results, database_defaults) # Outputs the database as a C++ database diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 48693247..985f24bd 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -25,7 +25,7 @@ def set_default_time(database_entry): return database_entry -def calculate_defaults(database, calculate_common_best=True): +def calculate_defaults(database, verbose, calculate_common_best=True): """Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults.""" database_defaults = pd.DataFrame() @@ -34,7 +34,7 @@ def calculate_defaults(database, calculate_common_best=True): clblast.ARGUMENT_ATTRIBUTES) for group_name, database_group in database_type_vendor: if calculate_common_best: - default_values = get_common_best(database_group, group_name) + default_values = get_common_best(database_group, group_name, verbose) else: default_values = get_smallest_best(database_group) default_values = set_default_device(default_values) @@ -69,16 +69,23 @@ def get_smallest_best(database): return database_best_results.min(axis=0) -def get_common_best(database, group_name): +def get_common_best(database, group_name, verbose): """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case - not every device was tuned with the same parameters.""" - # TODO: Quite a bit slower than the above `get_smallest_best` method + not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve + the smallest best execution time""" # Counts the number of devices in this group num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES)) # Removes columns without any values database = database.dropna(axis=1, how='all') + database = database.reset_index() + + # Inserts the relative execution times into the database + def relative_performance(x): + x["relative_performance"] = x["time"].min() / x["time"] + return x + database = database.groupby(clblast.ATTRIBUTES + ["kernel"]).apply(relative_performance) # Retrieves the parameter names for this kernel all_column_names = list(database.columns.values) @@ -94,17 +101,21 @@ def get_common_best(database, group_name): return get_smallest_best(database) # Computes the sum of the execution times over the different devices - database_common_by_parameters = database_common.groupby(parameter_column_names) - group_times = database_common_by_parameters['time'].transform(sum) - database_common.loc[:, 'group_time'] = group_times + def sum_performance(x): + x["group_performance"] = x["relative_performance"].sum() + return x + database_common = database_common.groupby(parameter_column_names).apply(sum_performance) - # Retrieves the entries with the best execution time - best_time = database_common["group_time"].min() - database_bests = database_common[database_common["group_time"] == best_time] + # Retrieves the entries with the highest performance + best_performance = database_common["group_performance"].max() + database_bests = database_common[database_common["group_performance"] == best_performance] # Retrieves one example only (the parameters are the same anyway) - database_bests = database_bests.drop_duplicates(["group_time"]) - # print("[database] " + str(group_name) + " with devices: " + str(num_devices) + " " + str(database_bests.shape)) - assert len(database_bests) == 1 + database_bests = database_bests.drop_duplicates(["group_performance"]) + # Completed, report and return the results + if verbose: + print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " + + str(num_devices) + " " + str(database_bests.shape)) + assert len(database_bests) == 1 return database_bests diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index d592f110..e9902293 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, { "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, { // ARM GPUs @@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Intel accelerators @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, } }, { // Default @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Intel CPUs @@ -118,7 +118,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel GPUs @@ -149,7 +149,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } }, { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default @@ -171,7 +171,7 @@ const Database::DatabaseEntry Database::CopyDouble = { { "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } }, { "Pitcairn", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, { // ARM GPUs @@ -185,7 +185,7 @@ const Database::DatabaseEntry Database::CopyDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel accelerators @@ -208,7 +208,7 @@ const Database::DatabaseEntry Database::CopyDouble = { { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Default @@ -230,7 +230,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Pitcairn", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // ARM GPUs @@ -244,7 +244,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel accelerators diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index cd034f15..a242a827 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Pitcairn", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // ARM GPUs @@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, } }, { // Intel GPUs @@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, } }, { // Intel accelerators @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Default @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // ARM GPUs @@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel GPUs @@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, } }, { // Intel accelerators @@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Default @@ -179,7 +179,7 @@ const Database::DatabaseEntry Database::PadDouble = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // ARM GPUs @@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::PadDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::PadDouble = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default @@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // ARM GPUs @@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -275,7 +275,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index c2034c3e..0f63eafa 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, } }, { // Intel GPUs @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, } }, { // Default @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, { // Intel accelerators @@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default @@ -179,7 +179,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // Intel accelerators @@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default @@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // Intel accelerators @@ -275,7 +275,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index 8e852c4b..d12d28f0 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "Oland", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Pitcairn", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, { // ARM GPUs @@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Intel accelerators @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Oland", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Pitcairn", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // ARM GPUs @@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, } }, { // Intel GPUs @@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, } }, { // NVIDIA GPUs @@ -151,7 +151,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default @@ -173,7 +173,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "Oland", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, { // ARM GPUs @@ -187,7 +187,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, } }, { // Intel accelerators @@ -210,7 +210,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // Default @@ -232,7 +232,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "Oland", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, { // ARM GPUs @@ -246,7 +246,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // NVIDIA GPUs @@ -263,7 +263,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 905ee084..2f1dd638 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Pitcairn", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } }, } }, { // ARM GPUs @@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } }, } }, { // Intel GPUs @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, } }, { // Default @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Pitcairn", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // ARM GPUs @@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS",1024}, {"WPT",2} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",2}, {"WGS",1024}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",8}, {"WGS",1024}, {"WPT",1} } }, } }, { // Intel GPUs @@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",2}, {"WGS",512}, {"WPT",1} } }, { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",8} } }, } }, { // Intel accelerators @@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, } }, { // Default @@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",8}, {"WGS",2048}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, } }, { // Intel accelerators @@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Default @@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "Oland", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // ARM GPUs @@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",1024}, {"WPT",1} } }, } }, { // Intel accelerators diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index e36dd8ca..394c25de 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XdotSingle = { { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",128}, {"WGS2",32} } }, - { "default", { {"WGS1",128}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -56,7 +56,7 @@ const Database::DatabaseEntry Database::XdotSingle = { { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",32} } }, { "Iris Pro", { {"WGS1",512}, {"WGS2",64} } }, - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",64}, {"WGS2",32} } }, } }, { // NVIDIA GPUs @@ -70,7 +70,7 @@ const Database::DatabaseEntry Database::XdotSingle = { { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } }, - { "default", { {"WGS1",128}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",256} } }, } }, { // Default @@ -92,7 +92,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "Oland", { {"WGS1",128}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -121,7 +121,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",512}, {"WGS2",64} } }, } }, { // Default @@ -143,7 +143,7 @@ const Database::DatabaseEntry Database::XdotDouble = { { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -163,7 +163,7 @@ const Database::DatabaseEntry Database::XdotDouble = { { "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",64} } }, } }, { // Default @@ -185,7 +185,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -205,7 +205,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",64} } }, } }, { // Default diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index 6d680b06..ba71893e 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -48,7 +48,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel GPUs @@ -81,7 +81,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { { "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } }, { "Tesla K20m", { {"WGS1",128}, {"WPT1",1} } }, { "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1} } }, } }, { // Default @@ -161,14 +161,14 @@ const Database::DatabaseEntry Database::XgemvDouble = { { "Oland", { {"WGS1",256}, {"WPT1",1} } }, { "Pitcairn", { {"WGS1",256}, {"WPT1",1} } }, { "Tahiti", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",2} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel accelerators @@ -191,7 +191,7 @@ const Database::DatabaseEntry Database::XgemvDouble = { { "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } }, { "Tesla K20m", { {"WGS1",256}, {"WPT1",1} } }, { "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, { // Default @@ -220,7 +220,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel accelerators @@ -234,7 +234,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { { "GRID K520", { {"WGS1",128}, {"WPT1",1} } }, { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX 670", { {"WGS1",128}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, { // Default diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index 65b15030..7e948540 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -48,7 +48,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } }, + { "default", { {"VW2",4}, {"WGS2",64}, {"WPT2",4} } }, } }, { // Intel GPUs @@ -58,7 +58,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, { "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, + { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } }, } }, { // Intel accelerators @@ -81,7 +81,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Tesla K20m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, } }, { // Default @@ -120,7 +120,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris Pro", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } }, } }, { // Intel accelerators @@ -189,7 +189,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = { { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "Tesla K20m", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, } }, { // Default diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 9822fb20..a57dcc66 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -32,7 +32,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, - { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } }, } }, { // NVIDIA GPUs @@ -69,7 +69,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, - { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",8} } }, } }, { // Default diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp index 216925fc..06f65c46 100644 --- a/src/database/kernels/xger.hpp +++ b/src/database/kernels/xger.hpp @@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { "Oland", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, { // ARM GPUs @@ -54,7 +54,7 @@ const Database::DatabaseEntry Database::XgerSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, - { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } }, } }, { // Intel GPUs @@ -63,7 +63,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } }, { "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, - { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",4} } }, } }, { // NVIDIA GPUs @@ -75,7 +75,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WGS2",16}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, } }, { // Default @@ -97,7 +97,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "Oland", { {"WGS1",4}, {"WGS2",8}, {"WPT",1} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, - { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, } }, { // ARM GPUs @@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } }, } }, { // Intel GPUs @@ -119,7 +119,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",4}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, - { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, } }, { // NVIDIA GPUs @@ -131,7 +131,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",32}, {"WGS2",16}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, } }, { // Default @@ -153,7 +153,7 @@ const Database::DatabaseEntry Database::XgerDouble = { { "Oland", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, } }, { // ARM GPUs @@ -166,7 +166,7 @@ const Database::DatabaseEntry Database::XgerDouble = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } }, + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } }, } }, { // NVIDIA GPUs @@ -178,7 +178,7 @@ const Database::DatabaseEntry Database::XgerDouble = { { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, } }, { // Default @@ -200,7 +200,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "Oland", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, { "Tahiti", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, - { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, { // ARM GPUs @@ -213,7 +213,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, - { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } }, } }, { // NVIDIA GPUs @@ -225,7 +225,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } }, { "GeForce GTX 750", { {"WGS1",8}, {"WGS2",32}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, } }, { // Default -- cgit v1.2.3 From 00979faab4cd83a1810e9faf3e9bffe36d535763 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 21 Aug 2016 20:16:06 +0200 Subject: Updated the changelog; refactored the database-get-bests code a bit --- CHANGELOG | 1 + scripts/database/database/bests.py | 23 +++++++++++++++++++++++ scripts/database/database/defaults.py | 24 ++++-------------------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8fce1969..852b734f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ Development version (next release) - Fixed a performance issue (caused by fp16 support) by optimizing alpha/beta parameter passing to kernels - Added an option (-warm_up) to do a warm-up run before timing in the performance clients - Improved performance significantly of rotated GEMV computations +- Improved performance of unseen/un-tuned devices by a better default tuning parameter selection - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/scripts/database/database/bests.py b/scripts/database/database/bests.py index edb81733..e6239258 100644 --- a/scripts/database/database/bests.py +++ b/scripts/database/database/bests.py @@ -18,3 +18,26 @@ def get_best_results(df): best_parameters = database_entry[database_entry["time"] == best_time].iloc[0] database_bests = database_bests.append(best_parameters, ignore_index=True) return database_bests + + +def get_relative_bests(df, parameter_column_names, name, verbose=False): + """Retrieves the relative best execution time over different devices""" + + # Computes the sum of the execution times over the different devices + def sum_performance(x): + x["group_performance"] = x["relative_performance"].sum() + return x + df = df.groupby(parameter_column_names).apply(sum_performance) + + # Retrieves the entries with the highest performance + best_performance = df["group_performance"].max() + df_bests = df[df["group_performance"] == best_performance] + + # Retrieves one example only (the parameters are the same anyway) + df_bests = df_bests.drop_duplicates(["group_performance"]) + + # Completed, report and return the results + if verbose: + print("[database] " + str(name) + " with performance " + str(best_performance) + " " + str(df_bests.shape)) + assert len(df_bests) == 1 + return df_bests diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 985f24bd..3428d9a9 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -97,25 +97,9 @@ def get_common_best(database, group_name, verbose): # Fall back to another method in case there are no shared entries at all across devices if len(database_common) == 0: - # print("[database] Skipping: " + str(group_name) + " with devices: %d %d " % (num_devices, len(database))) + if verbose: + print("[database] No common kernels for: " + str(group_name) + " with devices: %d " % num_devices) return get_smallest_best(database) - # Computes the sum of the execution times over the different devices - def sum_performance(x): - x["group_performance"] = x["relative_performance"].sum() - return x - database_common = database_common.groupby(parameter_column_names).apply(sum_performance) - - # Retrieves the entries with the highest performance - best_performance = database_common["group_performance"].max() - database_bests = database_common[database_common["group_performance"] == best_performance] - - # Retrieves one example only (the parameters are the same anyway) - database_bests = database_bests.drop_duplicates(["group_performance"]) - - # Completed, report and return the results - if verbose: - print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " + - str(num_devices) + " " + str(database_bests.shape)) - assert len(database_bests) == 1 - return database_bests + # Retrieves the entries with the highest relative performance + return bests.get_relative_bests(database_common, parameter_column_names, group_name, verbose) -- cgit v1.2.3 From 84db8958d1238d7b171d83989d60c3605a4e2ba2 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 21 Aug 2016 20:28:02 +0200 Subject: Increased the ratio of GEMM tuning results to explore; reduced the tuning search space to have a better chance to evaluate more likely parameter combinations --- src/tuning/kernels/xgemm.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp index 898b8435..eb7c8a66 100644 --- a/src/tuning/kernels/xgemm.cpp +++ b/src/tuning/kernels/xgemm.cpp @@ -48,7 +48,7 @@ class TuneXgemm { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1024; } - static double DefaultFraction() { return 2048.0; } + static double DefaultFraction() { return 256.0; } // Describes how to obtain the sizes of the buffers static size_t GetSizeX(const Arguments &) { return 1; } // N/A for this kernel @@ -67,9 +67,9 @@ class TuneXgemm { tuner.AddParameter(id, "NDIMC", {8, 16, 32}); tuner.AddParameter(id, "MDIMA", {8, 16, 32}); tuner.AddParameter(id, "NDIMB", {8, 16, 32}); - tuner.AddParameter(id, "KWI", {2, 8}); - tuner.AddParameter(id, "VWM", {1, 2, 4, 8}); - tuner.AddParameter(id, "VWN", {1, 2, 4, 8}); + tuner.AddParameter(id, "KWI", {2}); + tuner.AddParameter(id, "VWM", {1, 2, 4}); + tuner.AddParameter(id, "VWN", {1, 2, 4}); tuner.AddParameter(id, "STRM", {0, 1}); tuner.AddParameter(id, "STRN", {0, 1}); tuner.AddParameter(id, "SA", {0, 1}); -- cgit v1.2.3 From 0c0f0ac7f9b3b06244d5bdcde48fde4f9ffceb58 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 21 Aug 2016 20:35:20 +0200 Subject: Also changed the default-default for unknown device types to use the same method as for known device groups --- scripts/database/database/defaults.py | 5 ++++- src/database/kernels/copy.hpp | 8 ++++---- src/database/kernels/pad.hpp | 8 ++++---- src/database/kernels/padtranspose.hpp | 8 ++++---- src/database/kernels/transpose.hpp | 8 ++++---- src/database/kernels/xaxpy.hpp | 6 +++--- src/database/kernels/xdot.hpp | 8 ++++---- src/database/kernels/xgemv.hpp | 2 +- src/database/kernels/xgemv_fast_rot.hpp | 8 ++++---- 9 files changed, 32 insertions(+), 29 deletions(-) diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 3428d9a9..8a02c201 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -51,7 +51,10 @@ def calculate_defaults(database, verbose, calculate_common_best=True): # Defaults over all device types and vendors groups = database.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES) for group_name, database_group in groups: - default_values = get_smallest_best(database_group) + if calculate_common_best: + default_values = get_common_best(database_group, group_name, verbose) + else: + default_values = get_smallest_best(database_group) default_values["device_vendor"] = clblast.VENDOR_DEFAULT default_values["device_type"] = clblast.DEVICE_TYPE_DEFAULT default_values = set_default_device(default_values) diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index e9902293..0ea2b495 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -93,7 +93,7 @@ const Database::DatabaseEntry Database::CopySingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, } @@ -154,7 +154,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -213,7 +213,7 @@ const Database::DatabaseEntry Database::CopyDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, } @@ -272,7 +272,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index a242a827..d1643447 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -93,7 +93,7 @@ const Database::DatabaseEntry Database::PadSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, } @@ -162,7 +162,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -221,7 +221,7 @@ const Database::DatabaseEntry Database::PadDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -280,7 +280,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index 0f63eafa..bee66a4d 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -93,7 +93,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -162,7 +162,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -221,7 +221,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -280,7 +280,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, } }, } diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index d12d28f0..bf8ac665 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -93,7 +93,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, } @@ -156,7 +156,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -215,7 +215,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, } @@ -268,7 +268,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, } diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 2f1dd638..78ff3288 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -93,7 +93,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -162,7 +162,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -280,7 +280,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index 394c25de..51ab4099 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -75,7 +75,7 @@ const Database::DatabaseEntry Database::XdotSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, } @@ -126,7 +126,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, } @@ -168,7 +168,7 @@ const Database::DatabaseEntry Database::XdotDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",64} } }, } }, } @@ -210,7 +210,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",64} } }, } }, } diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index ba71893e..03b25bdc 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -196,7 +196,7 @@ const Database::DatabaseEntry Database::XgemvDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index a57dcc66..42d3c5d1 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -43,7 +43,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, } }, } @@ -74,7 +74,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, } @@ -104,7 +104,7 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, } @@ -128,7 +128,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",16} } }, } }, } -- cgit v1.2.3