summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-08-15 21:01:07 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-08-15 21:01:07 +0200
commit7d5631b7e4bb011725b512b55fefa1fa8165a8dd (patch)
tree578b0b2356e9d6e22066da7d439fb0828f4dcb18
parent7da6492b36cae7ba8859cd6d6ab3250e11f9a2b8 (diff)
Updated the database script to calculate the relative best performance of tuning results common for a device/vendor type
-rwxr-xr-xscripts/database/database.py2
-rw-r--r--scripts/database/database/defaults.py39
-rw-r--r--src/database/kernels/copy.hpp22
-rw-r--r--src/database/kernels/pad.hpp28
-rw-r--r--src/database/kernels/padtranspose.hpp22
-rw-r--r--src/database/kernels/transpose.hpp26
-rw-r--r--src/database/kernels/xaxpy.hpp22
-rw-r--r--src/database/kernels/xdot.hpp18
-rw-r--r--src/database/kernels/xgemv.hpp14
-rw-r--r--src/database/kernels/xgemv_fast.hpp10
-rw-r--r--src/database/kernels/xgemv_fast_rot.hpp4
-rw-r--r--src/database/kernels/xger.hpp28
12 files changed, 123 insertions, 112 deletions
diff --git a/scripts/database/database.py b/scripts/database/database.py
index 0ce89d03..6d370d99 100755
--- a/scripts/database/database.py
+++ b/scripts/database/database.py
@@ -99,7 +99,7 @@ def main(argv):
# Determines the defaults for other vendors and per vendor
print("[database] Calculating the default values...")
- database_defaults = defaults.calculate_defaults(database)
+ database_defaults = defaults.calculate_defaults(database, cl_args.verbose)
database_best_results = db.concatenate_database(database_best_results, database_defaults)
# Outputs the database as a C++ database
diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py
index 48693247..985f24bd 100644
--- a/scripts/database/database/defaults.py
+++ b/scripts/database/database/defaults.py
@@ -25,7 +25,7 @@ def set_default_time(database_entry):
return database_entry
-def calculate_defaults(database, calculate_common_best=True):
+def calculate_defaults(database, verbose, calculate_common_best=True):
"""Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults."""
database_defaults = pd.DataFrame()
@@ -34,7 +34,7 @@ def calculate_defaults(database, calculate_common_best=True):
clblast.ARGUMENT_ATTRIBUTES)
for group_name, database_group in database_type_vendor:
if calculate_common_best:
- default_values = get_common_best(database_group, group_name)
+ default_values = get_common_best(database_group, group_name, verbose)
else:
default_values = get_smallest_best(database_group)
default_values = set_default_device(default_values)
@@ -69,16 +69,23 @@ def get_smallest_best(database):
return database_best_results.min(axis=0)
-def get_common_best(database, group_name):
+def get_common_best(database, group_name, verbose):
"""Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
- not every device was tuned with the same parameters."""
- # TODO: Quite a bit slower than the above `get_smallest_best` method
+ not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
+ the smallest best execution time"""
# Counts the number of devices in this group
num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES))
# Removes columns without any values
database = database.dropna(axis=1, how='all')
+ database = database.reset_index()
+
+ # Inserts the relative execution times into the database
+ def relative_performance(x):
+ x["relative_performance"] = x["time"].min() / x["time"]
+ return x
+ database = database.groupby(clblast.ATTRIBUTES + ["kernel"]).apply(relative_performance)
# Retrieves the parameter names for this kernel
all_column_names = list(database.columns.values)
@@ -94,17 +101,21 @@ def get_common_best(database, group_name):
return get_smallest_best(database)
# Computes the sum of the execution times over the different devices
- database_common_by_parameters = database_common.groupby(parameter_column_names)
- group_times = database_common_by_parameters['time'].transform(sum)
- database_common.loc[:, 'group_time'] = group_times
+ def sum_performance(x):
+ x["group_performance"] = x["relative_performance"].sum()
+ return x
+ database_common = database_common.groupby(parameter_column_names).apply(sum_performance)
- # Retrieves the entries with the best execution time
- best_time = database_common["group_time"].min()
- database_bests = database_common[database_common["group_time"] == best_time]
+ # Retrieves the entries with the highest performance
+ best_performance = database_common["group_performance"].max()
+ database_bests = database_common[database_common["group_performance"] == best_performance]
# Retrieves one example only (the parameters are the same anyway)
- database_bests = database_bests.drop_duplicates(["group_time"])
- # print("[database] " + str(group_name) + " with devices: " + str(num_devices) + " " + str(database_bests.shape))
- assert len(database_bests) == 1
+ database_bests = database_bests.drop_duplicates(["group_performance"])
+ # Completed, report and return the results
+ if verbose:
+ print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " +
+ str(num_devices) + " " + str(database_bests.shape))
+ assert len(database_bests) == 1
return database_bests
diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp
index d592f110..e9902293 100644
--- a/src/database/kernels/copy.hpp
+++ b/src/database/kernels/copy.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::CopySingle = {
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
}
},
{ // ARM GPUs
@@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::CopySingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
@@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::CopySingle = {
{ "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
}
},
{ // Default
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // Intel CPUs
@@ -118,7 +118,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel GPUs
@@ -149,7 +149,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
{ "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // Default
@@ -171,7 +171,7 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
{ "Pitcairn", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
}
},
{ // ARM GPUs
@@ -185,7 +185,7 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
@@ -208,7 +208,7 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
}
},
{ // Default
@@ -230,7 +230,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = {
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Pitcairn", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // ARM GPUs
@@ -244,7 +244,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp
index cd034f15..a242a827 100644
--- a/src/database/kernels/pad.hpp
+++ b/src/database/kernels/pad.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Pitcairn", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // ARM GPUs
@@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } },
}
},
{ // Intel GPUs
@@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
}
},
{ // Intel accelerators
@@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Default
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // ARM GPUs
@@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel GPUs
@@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
{ "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
}
},
{ // Intel accelerators
@@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
{ // Default
@@ -179,7 +179,7 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // ARM GPUs
@@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel accelerators
@@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Default
@@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // ARM GPUs
@@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel accelerators
@@ -275,7 +275,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp
index c2034c3e..0f63eafa 100644
--- a/src/database/kernels/padtranspose.hpp
+++ b/src/database/kernels/padtranspose.hpp
@@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
}
},
{ // Intel GPUs
@@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
}
},
{ // Default
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
{ "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
}
},
{ // Intel accelerators
@@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
@@ -179,7 +179,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
@@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
- { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -275,7 +275,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp
index 8e852c4b..d12d28f0 100644
--- a/src/database/kernels/transpose.hpp
+++ b/src/database/kernels/transpose.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
{ "Pitcairn", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -65,7 +65,7 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
{ "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // Default
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "Oland", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Pitcairn", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
}
},
{ // ARM GPUs
@@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
}
},
{ // Intel GPUs
@@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -151,7 +151,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
}
},
{ // Default
@@ -173,7 +173,7 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -187,7 +187,7 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
}
},
{ // Intel accelerators
@@ -210,7 +210,7 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
}
},
{ // Default
@@ -232,7 +232,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "Oland", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
}
},
{ // ARM GPUs
@@ -246,7 +246,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // NVIDIA GPUs
@@ -263,7 +263,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp
index 905ee084..2f1dd638 100644
--- a/src/database/kernels/xaxpy.hpp
+++ b/src/database/kernels/xaxpy.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Pitcairn", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -55,7 +55,7 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
+ { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
}
},
{ // Intel GPUs
@@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } },
}
},
{ // Default
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Pitcairn", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -124,7 +124,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS",1024}, {"WPT",2} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
+ { "default", { {"VW",8}, {"WGS",1024}, {"WPT",1} } },
}
},
{ // Intel GPUs
@@ -134,7 +134,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",2}, {"WGS",512}, {"WPT",1} } },
{ "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",8} } },
}
},
{ // Intel accelerators
@@ -157,7 +157,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
}
},
{ // Default
@@ -193,7 +193,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",8}, {"WGS",2048}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
}
},
{ // Intel accelerators
@@ -216,7 +216,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // Default
@@ -238,7 +238,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = {
{ "Oland", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
{ "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -252,7 +252,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",1024}, {"WPT",1} } },
}
},
{ // Intel accelerators
diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp
index e36dd8ca..394c25de 100644
--- a/src/database/kernels/xdot.hpp
+++ b/src/database/kernels/xdot.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
- { "default", { {"WGS1",128}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -56,7 +56,7 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",32} } },
{ "Iris Pro", { {"WGS1",512}, {"WGS2",64} } },
- { "default", { {"WGS1",32}, {"WGS2",32} } },
+ { "default", { {"WGS1",64}, {"WGS2",32} } },
}
},
{ // NVIDIA GPUs
@@ -70,7 +70,7 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } },
- { "default", { {"WGS1",128}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",256} } },
}
},
{ // Default
@@ -92,7 +92,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ "Oland", { {"WGS1",128}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -121,7 +121,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",512}, {"WGS2",64} } },
}
},
{ // Default
@@ -143,7 +143,7 @@ const Database::DatabaseEntry Database::XdotDouble = {
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -163,7 +163,7 @@ const Database::DatabaseEntry Database::XdotDouble = {
{ "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",64} } },
}
},
{ // Default
@@ -185,7 +185,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -205,7 +205,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
{ "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",64} } },
}
},
{ // Default
diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp
index 6d680b06..ba71893e 100644
--- a/src/database/kernels/xgemv.hpp
+++ b/src/database/kernels/xgemv.hpp
@@ -48,7 +48,7 @@ const Database::DatabaseEntry Database::XgemvSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel GPUs
@@ -81,7 +81,7 @@ const Database::DatabaseEntry Database::XgemvSingle = {
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } },
{ "Tesla K20m", { {"WGS1",128}, {"WPT1",1} } },
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",256}, {"WPT1",1} } },
}
},
{ // Default
@@ -161,14 +161,14 @@ const Database::DatabaseEntry Database::XgemvDouble = {
{ "Oland", { {"WGS1",256}, {"WPT1",1} } },
{ "Pitcairn", { {"WGS1",256}, {"WPT1",1} } },
{ "Tahiti", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",256}, {"WPT1",1} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",2} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel accelerators
@@ -191,7 +191,7 @@ const Database::DatabaseEntry Database::XgemvDouble = {
{ "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } },
{ "Tesla K20m", { {"WGS1",256}, {"WPT1",1} } },
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1} } },
}
},
{ // Default
@@ -220,7 +220,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel accelerators
@@ -234,7 +234,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
{ "GRID K520", { {"WGS1",128}, {"WPT1",1} } },
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1} } },
{ "GeForce GTX 670", { {"WGS1",128}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp
index 65b15030..7e948540 100644
--- a/src/database/kernels/xgemv_fast.hpp
+++ b/src/database/kernels/xgemv_fast.hpp
@@ -48,7 +48,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } },
+ { "default", { {"VW2",4}, {"WGS2",64}, {"WPT2",4} } },
}
},
{ // Intel GPUs
@@ -58,7 +58,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
{ "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } },
{ "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } },
- { "default", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
+ { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } },
}
},
{ // Intel accelerators
@@ -81,7 +81,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = {
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
{ "Tesla K20m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
+ { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
}
},
{ // Default
@@ -120,7 +120,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
{ "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
{ "Iris Pro", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
+ { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } },
}
},
{ // Intel accelerators
@@ -189,7 +189,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = {
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
{ "Tesla K20m", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
+ { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp
index 9822fb20..a57dcc66 100644
--- a/src/database/kernels/xgemv_fast_rot.hpp
+++ b/src/database/kernels/xgemv_fast_rot.hpp
@@ -32,7 +32,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = {
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } },
{ "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
- { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
+ { "default", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } },
}
},
{ // NVIDIA GPUs
@@ -69,7 +69,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = {
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } },
{ "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
- { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",8} } },
}
},
{ // Default
diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp
index 216925fc..06f65c46 100644
--- a/src/database/kernels/xger.hpp
+++ b/src/database/kernels/xger.hpp
@@ -41,7 +41,7 @@ const Database::DatabaseEntry Database::XgerSingle = {
{ "Oland", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -54,7 +54,7 @@ const Database::DatabaseEntry Database::XgerSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
- { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // Intel GPUs
@@ -63,7 +63,7 @@ const Database::DatabaseEntry Database::XgerSingle = {
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } },
{ "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } },
- { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",2} } },
+ { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",4} } },
}
},
{ // NVIDIA GPUs
@@ -75,7 +75,7 @@ const Database::DatabaseEntry Database::XgerSingle = {
{ "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
{ "GeForce GTX 750", { {"WGS1",64}, {"WGS2",16}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // Default
@@ -97,7 +97,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
{ "Oland", { {"WGS1",4}, {"WGS2",8}, {"WPT",1} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
- { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -110,7 +110,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // Intel GPUs
@@ -119,7 +119,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",4}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
{ "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } },
- { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -131,7 +131,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
{ "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
{ "GeForce GTX 750", { {"WGS1",32}, {"WGS2",16}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
- { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } },
+ { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } },
}
},
{ // Default
@@ -153,7 +153,7 @@ const Database::DatabaseEntry Database::XgerDouble = {
{ "Oland", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -166,7 +166,7 @@ const Database::DatabaseEntry Database::XgerDouble = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
- { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } },
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // NVIDIA GPUs
@@ -178,7 +178,7 @@ const Database::DatabaseEntry Database::XgerDouble = {
{ "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
{ "GeForce GTX 750", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
- { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
}
},
{ // Default
@@ -200,7 +200,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = {
{ "Oland", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
- { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -213,7 +213,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
- { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // NVIDIA GPUs
@@ -225,7 +225,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = {
{ "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } },
{ "GeForce GTX 750", { {"WGS1",8}, {"WGS2",32}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } },
+ { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
}
},
{ // Default