diff options
-rw-r--r-- | include/internal/database.h | 11 | ||||
-rw-r--r-- | include/internal/database/copy.h | 106 | ||||
-rw-r--r-- | include/internal/database/pad.h | 106 | ||||
-rw-r--r-- | include/internal/database/padtranspose.h | 114 | ||||
-rw-r--r-- | include/internal/database/transpose.h | 110 | ||||
-rw-r--r-- | include/internal/database/xaxpy.h | 107 | ||||
-rw-r--r-- | include/internal/database/xdot.h | 91 | ||||
-rw-r--r-- | include/internal/database/xgemm.h | 111 | ||||
-rw-r--r-- | include/internal/database/xgemv.h | 101 | ||||
-rw-r--r-- | scripts/database/database.db | bin | 2093996 -> 2094065 bytes | |||
-rw-r--r-- | scripts/database/database.py | 42 | ||||
-rw-r--r-- | src/database.cc | 8 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 2 |
17 files changed, 351 insertions, 566 deletions
diff --git a/include/internal/database.h b/include/internal/database.h index 1ac0e646..9107f978 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -56,15 +56,8 @@ class Database { static constexpr auto kDeviceTypeAll = "default"; // The OpenCL device vendors - static constexpr auto kDeviceVendorNVIDIA = "NVIDIA Corporation"; - static constexpr auto kDeviceVendorAMD = "Advanced Micro Devices, Inc."; - static constexpr auto kDeviceVendorIntel = "Intel"; static constexpr auto kDeviceVendorAll = "default"; - // The OpenCL device names - static constexpr auto kDefaultDevice = "default"; - - // The database consists of separate database entries, stored together in a vector static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; @@ -72,8 +65,8 @@ class Database { static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; - static const DatabaseEntry TraSingle, TraDouble, TraComplexSingle, TraComplexDouble; - static const DatabaseEntry PadTraSingle, PadTraDouble, PadTraComplexSingle, PadTraComplexDouble; + static const DatabaseEntry TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; + static const DatabaseEntry PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble; static const std::vector<DatabaseEntry> database; // The constructor diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 541a352b..3bd85fa6 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Copy kernels. +// This file populates the database with best-found tuning parameters for the 'Copy' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::CopySingle = { "Copy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } }, - { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::CopySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyDouble = { - "Copy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } }, +const Database::DatabaseEntry Database::CopyComplexSingle = { + "Copy", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::CopyDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyComplexSingle = { - "Copy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, +const Database::DatabaseEntry Database::CopyDouble = { + "Copy", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { const Database::DatabaseEntry Database::CopyComplexDouble = { "Copy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index 4a599648..d833a934 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Pad kernels. +// This file populates the database with best-found tuning parameters for the 'Pad' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::PadSingle = { "Pad", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::PadSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadDouble = { - "Pad", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, +const Database::DatabaseEntry Database::PadComplexSingle = { + "Pad", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::PadDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadComplexSingle = { - "Pad", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, +const Database::DatabaseEntry Database::PadDouble = { + "Pad", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::PadComplexSingle = { const Database::DatabaseEntry Database::PadComplexDouble = { "Pad", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 53226c1d..dacc693f 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the PadTranspose kernels. +// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraSingle = { - "PadTranspose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, +const Database::DatabaseEntry Database::PadtransposeSingle = { + "Padtranspose", Precision::kSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::PadTraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraDouble = { - "PadTranspose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, +const Database::DatabaseEntry Database::PadtransposeComplexSingle = { + "Padtranspose", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::PadTraDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexSingle = { - "PadTranspose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, +const Database::DatabaseEntry Database::PadtransposeDouble = { + "Padtranspose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -100,27 +80,17 @@ const Database::DatabaseEntry Database::PadTraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexDouble = { - "PadTranspose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { +const Database::DatabaseEntry Database::PadtransposeComplexDouble = { + "Padtranspose", Precision::kComplexDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 1d12a13e..46a38bc2 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Transpose kernels. +// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::TraSingle = { +const Database::DatabaseEntry Database::TransposeSingle = { "Transpose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::TraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraDouble = { - "Transpose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, +const Database::DatabaseEntry Database::TransposeComplexSingle = { + "Transpose", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::TraDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::TraComplexSingle = { - "Transpose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, +const Database::DatabaseEntry Database::TransposeDouble = { + "Transpose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, } @@ -100,27 +80,17 @@ const Database::DatabaseEntry Database::TraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraComplexDouble = { +const Database::DatabaseEntry Database::TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 058e3c0a..783e142d 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xaxpy kernels. +// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XaxpySingle = { "Xaxpy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -43,55 +38,41 @@ const Database::DatabaseEntry Database::XaxpySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyDouble = { - "Xaxpy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, +const Database::DatabaseEntry Database::XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyComplexSingle = { - "Xaxpy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, +const Database::DatabaseEntry Database::XaxpyDouble = { + "Xaxpy", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -101,25 +82,15 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { const Database::DatabaseEntry Database::XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 05841eb7..66a5231e 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xdot kernels. +// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. // // ================================================================================================= @@ -16,22 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XdotSingle = { "Xdot", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, } }, } @@ -39,47 +38,41 @@ const Database::DatabaseEntry Database::XdotSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotDouble = { - "Xdot", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { +const Database::DatabaseEntry Database::XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XdotComplexSingle = { - "Xdot", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, +const Database::DatabaseEntry Database::XdotDouble = { + "Xdot", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, } @@ -89,21 +82,15 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { const Database::DatabaseEntry Database::XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 49598c8c..9fbd8fbb 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xgemm kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. // // ================================================================================================= @@ -16,27 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",8}, {"KWI",2}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, } }, } @@ -44,28 +38,23 @@ const Database::DatabaseEntry Database::XgemmSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDouble = { - "Xgemm", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, +const Database::DatabaseEntry Database::XgemmComplexSingle = { + "Xgemm", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } @@ -73,29 +62,17 @@ const Database::DatabaseEntry Database::XgemmDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmComplexSingle = { - "Xgemm", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, +const Database::DatabaseEntry Database::XgemmDouble = { + "Xgemm", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, } }, } @@ -105,29 +82,19 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { const Database::DatabaseEntry Database::XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - { "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } }; + // ================================================================================================= } // namespace clblast diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index c315500f..ae9fbf30 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xgemv kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"WGS2",256}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -43,55 +38,35 @@ const Database::DatabaseEntry Database::XgemvSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvDouble = { - "Xgemv", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, +const Database::DatabaseEntry Database::XgemvComplexSingle = { + "Xgemv", Precision::kComplexSingle, { { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XgemvComplexSingle = { - "Xgemv", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",1}, {"WGS2",64}, {"WPT2",4}, {"VW2",2}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, +const Database::DatabaseEntry Database::XgemvDouble = { + "Xgemv", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, } @@ -101,25 +76,9 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, } diff --git a/scripts/database/database.db b/scripts/database/database.db Binary files differindex 7c8ec445..bf793177 100644 --- a/scripts/database/database.db +++ b/scripts/database/database.db diff --git a/scripts/database/database.py b/scripts/database/database.py index cda55fcc..01662a4b 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -20,6 +20,11 @@ import json import pandas as pd # Constants +VENDOR_DEFAULT = "default" +DEVICETYPE_DEFAULT = "All" +DEVICENAME_DEFAULT = "default" + +# Attributes DEVICETYPE_ATTRIBUTES = ["device_vendor", "device_type"] DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"] KERNEL_ATTRIBUTES = ["precision", "kernel_family", @@ -67,13 +72,17 @@ def RemoveDuplicates(df): def RemoveEntriesByDevice(df, devicename): return df[df["device"] != devicename] +def GetEntriesByField(df, field, value): + return df[df[field] == value] + # Retrieves the results with the lowest execution times def GetBestResults(df): dfbest = pd.DataFrame() grouped = df.groupby(ATTRIBUTES+["kernel"]) for name, dfgroup in grouped: - bestcase = dfgroup.loc[[dfgroup["time"].idxmin()]] - dfbest = ConcatenateData(dfbest, bestcase) + besttime = dfgroup["time"].min() + bestcase = dfgroup[dfgroup["time"] == besttime].iloc[0] + dfbest = dfbest.append(bestcase, ignore_index=True) return dfbest # Sets defaults for devices of the same type/vendor based on the smallest values of all know @@ -81,14 +90,30 @@ def GetBestResults(df): # on other devices. def CalculateDefaults(df): dfdefault = pd.DataFrame() - grouped = df.groupby(DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES) - for name, dfgroup in grouped: + + # Defaults per type/vendor + groups = df.groupby(DEVICETYPE_ATTRIBUTES+KERNEL_ATTRIBUTES+["kernel"]) + for name, dfgroup in groups: + default_values = dfgroup.min(axis=0) + default_values["device"] = DEVICENAME_DEFAULT + default_values["device_compute_units"] = 0 + default_values["device_core_clock"] = 0 + default_values["time"] = 0.0 + dfdefault = dfdefault.append(default_values, ignore_index=True) + + # Defaults in general + groups = df.groupby(KERNEL_ATTRIBUTES+["kernel"]) + for name, dfgroup in groups: default_values = dfgroup.min(axis=0) - default_values["device"] = "default" + default_values["device_vendor"] = VENDOR_DEFAULT + default_values["device_type"] = DEVICETYPE_DEFAULT + default_values["device"] = DEVICENAME_DEFAULT default_values["device_compute_units"] = 0 default_values["device_core_clock"] = 0 default_values["time"] = 0.0 dfdefault = dfdefault.append(default_values, ignore_index=True) + + # Database with both types of defaults only return dfdefault # ================================================================================================== @@ -132,8 +157,9 @@ def GetPrecision(family, precision): # The C++ device type and vendor def GetDeviceVendor(vendor, devtype): - return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" - % (vendor, devtype, devtype, vendor)) + if vendor == VENDOR_DEFAULT and devtype == DEVICETYPE_DEFAULT: + return(" { // Default\n kDeviceType%s, \"%s\", {\n" % (devtype, vendor)) + return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, devtype, devtype, vendor)) # Prints the data to a C++ database def PrintData(df, outputdir): @@ -226,6 +252,6 @@ bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database path_cpp_database = os.path.join(path_clblast, "include", "internal", "database") print "## Producing a C++ database in '"+path_cpp_database+"'" -PrintData(bests, ".") +PrintData(bests, path_cpp_database) # ================================================================================================== diff --git a/src/database.cc b/src/database.cc index b7275dad..7f5ac6eb 100644 --- a/src/database.cc +++ b/src/database.cc @@ -34,8 +34,8 @@ const std::vector<Database::DatabaseEntry> Database::database = { XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, - TraSingle, TraDouble, TraComplexSingle, TraComplexDouble, - PadTraSingle, PadTraDouble, PadTraComplexSingle, PadTraComplexDouble + TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, + PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble }; // ================================================================================================= @@ -78,6 +78,7 @@ Database::Parameters Database::Search(const std::string &this_kernel, const std::string &this_device, const Precision this_precision) const { for (auto &db: database) { + if (db.kernel == this_kernel && db.precision == this_precision) { // Searches for the right vendor and device type, or selects the default if unavailable. This @@ -89,7 +90,8 @@ Database::Parameters Database::Search(const std::string &this_kernel, // Searches for the right device. If the current device is unavailable, selects the vendor // default parameters. This assumes the default is last in the database. for (auto &device: vendor.devices) { - if (device.name == this_device || device.name == kDefaultDevice) { + + if (device.name == this_device || device.name == "default") { // Sets the parameters accordingly return device.parameters; diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 94aadcad..3961a3fd 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(Queue &queue, Event &event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 5b84decb..e9970fd1 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -28,7 +28,7 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(Queue &queue, Event &event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 6a915c0b..49fd12af 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -28,7 +28,7 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(Queue &queue, Event &event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index de5f1afc..966a000f 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(Queue &queue, Event &event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index d8fc6357..630cb731 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(Queue &queue, Event &event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" |