diff options
-rwxr-xr-x | scripts/database/database.py | 3 | ||||
-rw-r--r-- | scripts/database/database/db.py | 30 | ||||
-rw-r--r-- | scripts/database/database/defaults.py | 10 | ||||
-rw-r--r-- | src/database/kernels/copy.hpp | 2 | ||||
-rw-r--r-- | src/database/kernels/xaxpy.hpp | 8 | ||||
-rw-r--r-- | src/database/kernels/xgemm.hpp | 2 | ||||
-rw-r--r-- | src/database/kernels/xgemv.hpp | 2 | ||||
-rw-r--r-- | src/database/kernels/xgemv_fast.hpp | 8 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm.cpp | 85 |
9 files changed, 98 insertions, 52 deletions
diff --git a/scripts/database/database.py b/scripts/database/database.py index 6d370d99..5c859487 100755 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -77,12 +77,13 @@ def main(argv): # Adds the new data to the database old_size = len(database.index) database = db.concatenate_database(database, imported_data) - database = db.remove_duplicates(database) + database = database.drop_duplicates() new_size = len(database.index) print("with " + str(new_size - old_size) + " new items") # Newline printed here # Stores the modified database back to disk if len(glob.glob(json_files)) >= 1: + database = db.remove_duplicates(database) io.save_database(database, database_filename) # Optional: update the database here. Default is disabled, code below is just an example diff --git a/scripts/database/database/db.py b/scripts/database/database/db.py index 60cfbcfa..6534d689 100644 --- a/scripts/database/database/db.py +++ b/scripts/database/database/db.py @@ -6,6 +6,7 @@ # Cedric Nugteren <www.cedricnugteren.nl> import pandas as pd +import numpy as np def get_entries_by_field(database, field, value): @@ -18,11 +19,6 @@ def concatenate_database(database1, database2): return pd.concat([database1, database2]) -def remove_duplicates(database): - """Removes duplicates from a database""" - return database.drop_duplicates() - - def find_and_replace(database, dictionary): """Finds and replaces entries in a database based on a dictionary. Example: dictionary = { "key_to_edit": { find1: replace1, find2, replace2 } }""" @@ -48,3 +44,27 @@ def update_database(database, condition, field, value): """Updates the database by writing a specific value to a given field, given certain conditions""" database.loc[condition, field] = value return database + + +def remove_duplicates(database): + """Removes duplicates from the database based on all but the 'time' column""" + + # First remove 100% duplicate entries + database = database.drop_duplicates() + + # Replace NaNs with -1 first (needed for groupby) + database = database.replace(np.nan, -1) + + # In case multiple runs for the exact same configuration where made: take just the best performing one into account + other_column_names = list(database.columns.values) + other_column_names.remove("time") + database_by_time = database.groupby(other_column_names,) + num_removals = len(database) - len(database_by_time) + if num_removals > 0: + print("[database] Removing %d entries: keeping only those with the lowest execution time" % num_removals) + print("[database] Note: this might take a while") + database = database_by_time.apply(lambda x: x[x["time"] == x["time"].min()]) + + # Re-replace the NaN values + database = database.replace(-1, np.nan) + return database diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 3bde33c1..d71e604f 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -81,16 +81,6 @@ def get_common_best(database, group_name, verbose): # Removes columns without any values database = database.dropna(axis=1, how='all') - database = database.reset_index() - - # In case multiple runs for the exact same configuration where made: take just the best performing one into account - other_column_names = list(database.columns.values) - other_column_names.remove("time") - database_by_time = database.groupby(other_column_names) - if len(database_by_time) != len(database): - if verbose: - print("[database] " + str(group_name) + " keeping only entries with the lowest execution time") - database = database_by_time.apply(lambda x: x[x["time"] == x["time"].min()]) # Inserts the relative execution times into the database def relative_performance(x): diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index dc2011fd..a6b7dfe8 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -87,7 +87,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, - { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 60fa7555..6e84ca5a 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -79,10 +79,10 @@ const Database::DatabaseEntry Database::XaxpySingle = { { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "GRID K520", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 1070", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 480", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",4} } }, + { "GeForce GTX 480", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 670", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 680", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, @@ -209,7 +209,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { kDeviceTypeGPU, "NVIDIA", { { "GRID K520", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, - { "GeForce GTX 480", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 480", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 670", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index c960592d..cc81cf6a 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -158,7 +158,7 @@ const Database::DatabaseEntry Database::XgemmDouble = { "Xgemm", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, { "Hawaii", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, { "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, { "Pitcairn", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index 7e8e64e3..03e84525 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WPT1",4} } }, + { "default", { {"WGS1",64}, {"WPT1",1} } }, } }, } diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index f5e3e630..c12fcdca 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -60,7 +60,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, { "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } }, + { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } }, } }, { // Intel accelerators @@ -88,7 +88,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, + { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, } }, } @@ -123,7 +123,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris Pro", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, - { "default", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, } }, { // Intel accelerators @@ -145,7 +145,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, } }, } diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp index eb7c8a66..7c9ac76a 100644 --- a/src/tuning/kernels/xgemm.cpp +++ b/src/tuning/kernels/xgemm.cpp @@ -7,7 +7,9 @@ // Author(s): // Cedric Nugteren <www.cedricnugteren.nl> // -// This file uses the CLTune auto-tuner to tune the xgemm OpenCL kernels. +// This file uses the CLTune auto-tuner to tune the xgemm OpenCL kernels. There are two variations: +// - V==1: This tests some limited set of tuning parameters exhaustively. +// - V==2: This tests a much larger set of tuning parameters by randomly sampling a subset. // // ================================================================================================= @@ -21,12 +23,12 @@ namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class -template <typename T> +template <typename T, int V> class TuneXgemm { public: // The representative kernel and the source code - static std::string KernelFamily() { return "xgemm"; } + static std::string KernelFamily() { return (V==1) ? "xgemm_1" : "xgemm_2"; } static std::string KernelName() { return "Xgemm"; } static std::string GetSources() { return @@ -48,7 +50,7 @@ class TuneXgemm { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1024; } - static double DefaultFraction() { return 256.0; } + static double DefaultFraction() { return (V==1) ? 1.0 : 512.0; } // test all or sample randomly // Describes how to obtain the sizes of the buffers static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel @@ -60,20 +62,38 @@ class TuneXgemm { // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { - tuner.AddParameter(id, "MWG", {16, 32, 64, 128}); - tuner.AddParameter(id, "NWG", {16, 32, 64, 128}); - tuner.AddParameter(id, "KWG", {16, 32}); - tuner.AddParameter(id, "MDIMC", {8, 16, 32}); - tuner.AddParameter(id, "NDIMC", {8, 16, 32}); - tuner.AddParameter(id, "MDIMA", {8, 16, 32}); - tuner.AddParameter(id, "NDIMB", {8, 16, 32}); - tuner.AddParameter(id, "KWI", {2}); - tuner.AddParameter(id, "VWM", {1, 2, 4}); - tuner.AddParameter(id, "VWN", {1, 2, 4}); - tuner.AddParameter(id, "STRM", {0, 1}); - tuner.AddParameter(id, "STRN", {0, 1}); - tuner.AddParameter(id, "SA", {0, 1}); - tuner.AddParameter(id, "SB", {0, 1}); + if (V==1) { // limited subset of tuning parameters - but explorable exhaustively + tuner.AddParameter(id, "MWG", {16, 32, 64}); + tuner.AddParameter(id, "NWG", {16, 32, 64}); + tuner.AddParameter(id, "KWG", {32}); + tuner.AddParameter(id, "MDIMC", {8, 16, 32}); + tuner.AddParameter(id, "NDIMC", {8, 16, 32}); + tuner.AddParameter(id, "MDIMA", {8, 16, 32}); + tuner.AddParameter(id, "NDIMB", {8, 16, 32}); + tuner.AddParameter(id, "KWI", {2}); + tuner.AddParameter(id, "VWM", {1, 2, 4}); + tuner.AddParameter(id, "VWN", {1, 2, 4}); + tuner.AddParameter(id, "STRM", {0}); + tuner.AddParameter(id, "STRN", {0}); + tuner.AddParameter(id, "SA", {0, 1}); + tuner.AddParameter(id, "SB", {0, 1}); + } // a lot more tuning parameters - has to be sampled randomly, too much to test all + else { + tuner.AddParameter(id, "MWG", {16, 32, 64, 128}); + tuner.AddParameter(id, "NWG", {16, 32, 64, 128}); + tuner.AddParameter(id, "KWG", {16, 32}); + tuner.AddParameter(id, "MDIMC", {8, 16, 32}); + tuner.AddParameter(id, "NDIMC", {8, 16, 32}); + tuner.AddParameter(id, "MDIMA", {8, 16, 32}); + tuner.AddParameter(id, "NDIMB", {8, 16, 32}); + tuner.AddParameter(id, "KWI", {2}); + tuner.AddParameter(id, "VWM", {1, 2, 4, 8}); + tuner.AddParameter(id, "VWN", {1, 2, 4, 8}); + tuner.AddParameter(id, "STRM", {0, 1}); + tuner.AddParameter(id, "STRN", {0, 1}); + tuner.AddParameter(id, "SA", {0, 1}); + tuner.AddParameter(id, "SB", {0, 1}); + } } // Sets the constraints @@ -92,6 +112,14 @@ class TuneXgemm { // KWG has to be a multiple of KDIMA = ((MDIMC*NDIMC)/(MDIMA)) and KDIMB = (...) tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"KWG", "MDIMC", "NDIMC", "MDIMA"}); tuner.AddConstraint(id, MultipleOfXMulYDivZ, {"KWG", "MDIMC", "NDIMC", "NDIMB"}); + + // Extra constraints for variation 1 to limit the set of options significantly + if (V==1) { + auto IsEqual = [] (std::vector<size_t> v) { return v[0] == v[1]; }; + tuner.AddConstraint(id, IsEqual, {"MDIMC", "MDIMA"}); + tuner.AddConstraint(id, IsEqual, {"NDIMC", "NDIMB"}); + tuner.AddConstraint(id, IsEqual, {"SA", "SB"}); + } } // Sets the local memory size @@ -145,15 +173,22 @@ class TuneXgemm { using float2 = clblast::float2; using double2 = clblast::double2; -// Main function (not within the clblast namespace) -int main(int argc, char *argv[]) { +// Function to tune a specific variation V (not within the clblast namespace) +template <int V> +void StartVariation(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { - case clblast::Precision::kHalf: clblast::Tuner<clblast::TuneXgemm<half>, half>(argc, argv); break; - case clblast::Precision::kSingle: clblast::Tuner<clblast::TuneXgemm<float>, float>(argc, argv); break; - case clblast::Precision::kDouble: clblast::Tuner<clblast::TuneXgemm<double>, double>(argc, argv); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<clblast::TuneXgemm<float2>, float2>(argc, argv); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<clblast::TuneXgemm<double2>, double2>(argc, argv); break; + case clblast::Precision::kHalf: clblast::Tuner<clblast::TuneXgemm<half,V>, half>(argc, argv); break; + case clblast::Precision::kSingle: clblast::Tuner<clblast::TuneXgemm<float,V>, float>(argc, argv); break; + case clblast::Precision::kDouble: clblast::Tuner<clblast::TuneXgemm<double,V>, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<clblast::TuneXgemm<float2,V>, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<clblast::TuneXgemm<double2,V>, double2>(argc, argv); break; } +} + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + StartVariation<1>(argc, argv); + StartVariation<2>(argc, argv); return 0; } |