diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-01-30 11:43:21 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-01-30 11:43:21 +0100 |
commit | 276e772a2c672ad868ba57e06d55e4991c793207 (patch) | |
tree | 015c32f78022e65403dc207c80d2b52458363f68 /include | |
parent | 76c91480303dd398b4ff5953a833e493b1409630 (diff) |
Added first auto-generated database headers from the Python database; only K40 and Iris supported now
Diffstat (limited to 'include')
-rw-r--r-- | include/internal/database.h | 11 | ||||
-rw-r--r-- | include/internal/database/copy.h | 106 | ||||
-rw-r--r-- | include/internal/database/pad.h | 106 | ||||
-rw-r--r-- | include/internal/database/padtranspose.h | 114 | ||||
-rw-r--r-- | include/internal/database/transpose.h | 110 | ||||
-rw-r--r-- | include/internal/database/xaxpy.h | 107 | ||||
-rw-r--r-- | include/internal/database/xdot.h | 91 | ||||
-rw-r--r-- | include/internal/database/xgemm.h | 111 | ||||
-rw-r--r-- | include/internal/database/xgemv.h | 101 |
9 files changed, 307 insertions, 550 deletions
diff --git a/include/internal/database.h b/include/internal/database.h index 1ac0e646..9107f978 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -56,15 +56,8 @@ class Database { static constexpr auto kDeviceTypeAll = "default"; // The OpenCL device vendors - static constexpr auto kDeviceVendorNVIDIA = "NVIDIA Corporation"; - static constexpr auto kDeviceVendorAMD = "Advanced Micro Devices, Inc."; - static constexpr auto kDeviceVendorIntel = "Intel"; static constexpr auto kDeviceVendorAll = "default"; - // The OpenCL device names - static constexpr auto kDefaultDevice = "default"; - - // The database consists of separate database entries, stored together in a vector static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; @@ -72,8 +65,8 @@ class Database { static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; - static const DatabaseEntry TraSingle, TraDouble, TraComplexSingle, TraComplexDouble; - static const DatabaseEntry PadTraSingle, PadTraDouble, PadTraComplexSingle, PadTraComplexDouble; + static const DatabaseEntry TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; + static const DatabaseEntry PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble; static const std::vector<DatabaseEntry> database; // The constructor diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 541a352b..3bd85fa6 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Copy kernels. +// This file populates the database with best-found tuning parameters for the 'Copy' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::CopySingle = { "Copy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } }, - { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::CopySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyDouble = { - "Copy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } }, +const Database::DatabaseEntry Database::CopyComplexSingle = { + "Copy", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::CopyDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::CopyComplexSingle = { - "Copy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, +const Database::DatabaseEntry Database::CopyDouble = { + "Copy", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { const Database::DatabaseEntry Database::CopyComplexDouble = { "Copy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index 4a599648..d833a934 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Pad kernels. +// This file populates the database with best-found tuning parameters for the 'Pad' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::PadSingle = { "Pad", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::PadSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadDouble = { - "Pad", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, +const Database::DatabaseEntry Database::PadComplexSingle = { + "Pad", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::PadDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadComplexSingle = { - "Pad", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, +const Database::DatabaseEntry Database::PadDouble = { + "Pad", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::PadComplexSingle = { const Database::DatabaseEntry Database::PadComplexDouble = { "Pad", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 53226c1d..dacc693f 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the PadTranspose kernels. +// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraSingle = { - "PadTranspose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, +const Database::DatabaseEntry Database::PadtransposeSingle = { + "Padtranspose", Precision::kSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::PadTraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraDouble = { - "PadTranspose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, +const Database::DatabaseEntry Database::PadtransposeComplexSingle = { + "Padtranspose", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::PadTraDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexSingle = { - "PadTranspose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, +const Database::DatabaseEntry Database::PadtransposeDouble = { + "Padtranspose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -100,27 +80,17 @@ const Database::DatabaseEntry Database::PadTraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexDouble = { - "PadTranspose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { +const Database::DatabaseEntry Database::PadtransposeComplexDouble = { + "Padtranspose", Precision::kComplexDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 1d12a13e..46a38bc2 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Transpose kernels. +// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::TraSingle = { +const Database::DatabaseEntry Database::TransposeSingle = { "Transpose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::TraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraDouble = { - "Transpose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, +const Database::DatabaseEntry Database::TransposeComplexSingle = { + "Transpose", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::TraDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::TraComplexSingle = { - "Transpose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, +const Database::DatabaseEntry Database::TransposeDouble = { + "Transpose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, } @@ -100,27 +80,17 @@ const Database::DatabaseEntry Database::TraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraComplexDouble = { +const Database::DatabaseEntry Database::TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 058e3c0a..783e142d 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xaxpy kernels. +// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XaxpySingle = { "Xaxpy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -43,55 +38,41 @@ const Database::DatabaseEntry Database::XaxpySingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyDouble = { - "Xaxpy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, +const Database::DatabaseEntry Database::XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XaxpyComplexSingle = { - "Xaxpy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, +const Database::DatabaseEntry Database::XaxpyDouble = { + "Xaxpy", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -101,25 +82,15 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { const Database::DatabaseEntry Database::XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 05841eb7..66a5231e 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xdot kernels. +// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. // // ================================================================================================= @@ -16,22 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XdotSingle = { "Xdot", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, } }, } @@ -39,47 +38,41 @@ const Database::DatabaseEntry Database::XdotSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XdotDouble = { - "Xdot", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { +const Database::DatabaseEntry Database::XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XdotComplexSingle = { - "Xdot", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, +const Database::DatabaseEntry Database::XdotDouble = { + "Xdot", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, } @@ -89,21 +82,15 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { const Database::DatabaseEntry Database::XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 49598c8c..9fbd8fbb 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xgemm kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. // // ================================================================================================= @@ -16,27 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",8}, {"KWI",2}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, } }, } @@ -44,28 +38,23 @@ const Database::DatabaseEntry Database::XgemmSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmDouble = { - "Xgemm", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, +const Database::DatabaseEntry Database::XgemmComplexSingle = { + "Xgemm", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } @@ -73,29 +62,17 @@ const Database::DatabaseEntry Database::XgemmDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemmComplexSingle = { - "Xgemm", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, +const Database::DatabaseEntry Database::XgemmDouble = { + "Xgemm", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, } }, } @@ -105,29 +82,19 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { const Database::DatabaseEntry Database::XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - { "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } }; + // ================================================================================================= } // namespace clblast diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index c315500f..ae9fbf30 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> +// Database generator <database.py> // -// This file populates the database with best-found tuning parameters for the Xgemv kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. // // ================================================================================================= @@ -16,26 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"WGS2",256}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, } }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -43,55 +38,35 @@ const Database::DatabaseEntry Database::XgemvSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvDouble = { - "Xgemv", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, +const Database::DatabaseEntry Database::XgemvComplexSingle = { + "Xgemv", Precision::kComplexSingle, { { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } }; + // ================================================================================================= -const Database::DatabaseEntry Database::XgemvComplexSingle = { - "Xgemv", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",1}, {"WGS2",64}, {"WPT2",4}, {"VW2",2}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, +const Database::DatabaseEntry Database::XgemvDouble = { + "Xgemv", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, } @@ -101,25 +76,9 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, } |