diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-21 21:32:18 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-21 21:32:18 +0200 |
commit | 6aa652d6ea2389744195ae5cd19321325b2d71aa (patch) | |
tree | 58243cb4fbebb094c543028124412718cdeb7c97 /src/database | |
parent | 5004a435ff984bba0dff0147a5c4f6a04d703562 (diff) | |
parent | b1929d8ce7022cacbd1812d62098ebd0681bc1ef (diff) |
Merge branch 'development' into gemm_direct
Diffstat (limited to 'src/database')
-rw-r--r-- | src/database/database.cpp | 4 | ||||
-rw-r--r-- | src/database/database.hpp | 4 | ||||
-rw-r--r-- | src/database/kernels/copy.hpp | 39 | ||||
-rw-r--r-- | src/database/kernels/pad.hpp | 41 | ||||
-rw-r--r-- | src/database/kernels/padtranspose.hpp | 33 | ||||
-rw-r--r-- | src/database/kernels/transpose.hpp | 39 | ||||
-rw-r--r-- | src/database/kernels/xaxpy.hpp | 45 | ||||
-rw-r--r-- | src/database/kernels/xdot.hpp | 33 | ||||
-rw-r--r-- | src/database/kernels/xgemm.hpp | 18 | ||||
-rw-r--r-- | src/database/kernels/xgemv.hpp | 25 | ||||
-rw-r--r-- | src/database/kernels/xgemv_fast.hpp | 15 | ||||
-rw-r--r-- | src/database/kernels/xgemv_fast_rot.hpp | 28 | ||||
-rw-r--r-- | src/database/kernels/xger.hpp | 41 |
13 files changed, 209 insertions, 156 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp index 38974b95..34c44a29 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -35,9 +35,9 @@ const std::vector<Database::DatabaseEntry> Database::database = { XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, - /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, + XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, - /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, + XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, diff --git a/src/database/database.hpp b/src/database/database.hpp index 8d6d3863..a6ab49c5 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -72,9 +72,9 @@ class Database { static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; - static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; + static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; - static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; + static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp index d592f110..a6b7dfe8 100644 --- a/src/database/kernels/copy.hpp +++ b/src/database/kernels/copy.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::CopyHalf = { "Copy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, } @@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::CopySingle = { { "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, { "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, { // ARM GPUs @@ -61,11 +62,12 @@ const Database::DatabaseEntry Database::CopySingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Intel accelerators @@ -85,15 +87,15 @@ const Database::DatabaseEntry Database::CopySingle = { { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, - { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, } @@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Intel CPUs @@ -118,17 +120,18 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, { // Intel accelerators @@ -149,12 +152,12 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } }, { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -171,7 +174,7 @@ const Database::DatabaseEntry Database::CopyDouble = { { "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } }, { "Pitcairn", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, { // ARM GPUs @@ -185,7 +188,7 @@ const Database::DatabaseEntry Database::CopyDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel accelerators @@ -208,12 +211,12 @@ const Database::DatabaseEntry Database::CopyDouble = { { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, } @@ -230,7 +233,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Pitcairn", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // ARM GPUs @@ -244,7 +247,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } }, } }, { // Intel accelerators @@ -272,7 +275,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp index cd034f15..3cfabaf4 100644 --- a/src/database/kernels/pad.hpp +++ b/src/database/kernels/pad.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::PadHalf = { "Pad", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } @@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::PadSingle = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Pitcairn", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // ARM GPUs @@ -55,17 +56,18 @@ const Database::DatabaseEntry Database::PadSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::PadSingle = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, } @@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // ARM GPUs @@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, { "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, } }, { // Intel accelerators @@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -179,7 +182,7 @@ const Database::DatabaseEntry Database::PadDouble = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, { // ARM GPUs @@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::PadDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -216,12 +219,12 @@ const Database::DatabaseEntry Database::PadDouble = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Pitcairn", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // ARM GPUs @@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -275,12 +278,12 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp index c2034c3e..88bd4ea7 100644 --- a/src/database/kernels/padtranspose.hpp +++ b/src/database/kernels/padtranspose.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::PadtransposeHalf = { "Padtranspose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } @@ -55,12 +56,13 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, @@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -130,11 +132,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, { // Intel accelerators @@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -179,7 +182,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // Intel accelerators @@ -216,12 +219,12 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // ARM GPUs @@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, } }, { // Intel accelerators @@ -275,12 +278,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, } }, } diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp index 8e852c4b..0e1b608e 100644 --- a/src/database/kernels/transpose.hpp +++ b/src/database/kernels/transpose.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::TransposeHalf = { "Transpose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } @@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "Oland", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Pitcairn", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, { // ARM GPUs @@ -61,11 +62,12 @@ const Database::DatabaseEntry Database::TransposeSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Intel accelerators @@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, } @@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Oland", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Pitcairn", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // ARM GPUs @@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, } }, { // NVIDIA GPUs @@ -151,12 +154,12 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -173,7 +176,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "Oland", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, { // ARM GPUs @@ -187,7 +190,7 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, } }, { // Intel accelerators @@ -210,12 +213,12 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, } @@ -232,7 +235,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "Oland", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, { // ARM GPUs @@ -246,7 +249,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // NVIDIA GPUs @@ -263,12 +266,12 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, } diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp index 905ee084..9c1bcd99 100644 --- a/src/database/kernels/xaxpy.hpp +++ b/src/database/kernels/xaxpy.hpp @@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XaxpyHalf = { "Xaxpy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, - { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, + { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, + { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Pitcairn", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } }, } }, { // ARM GPUs @@ -55,13 +56,14 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",8}, {"WGS",256}, {"WPT",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",1}, {"WGS",512}, {"WPT",2} } }, { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Iris Pro", { {"VW",1}, {"WGS",128}, {"WPT",2} } }, @@ -77,10 +79,10 @@ const Database::DatabaseEntry Database::XaxpySingle = { { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "GRID K520", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 1070", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 480", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",4} } }, + { "GeForce GTX 480", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 670", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "GeForce GTX 680", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, @@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Pitcairn", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // ARM GPUs @@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS",1024}, {"WPT",2} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",2}, {"WGS",1024}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",8}, {"WGS",1024}, {"WPT",1} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"VW",4}, {"WGS",64}, {"WPT",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",2}, {"WGS",512}, {"WPT",1} } }, { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",2} } }, } }, { // Intel accelerators @@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",8}, {"WGS",2048}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, } }, { // Intel accelerators @@ -206,7 +209,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { kDeviceTypeGPU, "NVIDIA", { { "GRID K520", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, - { "GeForce GTX 480", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 480", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "GeForce GTX 670", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "GeForce GTX 750", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, @@ -216,7 +219,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, { "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Default @@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "Oland", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // ARM GPUs @@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",1024}, {"WPT",1} } }, } }, { // Intel accelerators @@ -280,7 +283,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp index e36dd8ca..987a990d 100644 --- a/src/database/kernels/xdot.hpp +++ b/src/database/kernels/xdot.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::XdotHalf = { "Xdot", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",32}, {"WGS2",32} } }, { "default", { {"WGS1",32}, {"WGS2",32} } }, } @@ -37,7 +38,6 @@ const Database::DatabaseEntry Database::XdotSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",128}, {"WGS2",32} } }, @@ -53,10 +53,11 @@ const Database::DatabaseEntry Database::XdotSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WGS2",32} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WGS2",32} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",32} } }, { "Iris Pro", { {"WGS1",512}, {"WGS2",64} } }, - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",64}, {"WGS2",32} } }, } }, { // NVIDIA GPUs @@ -70,12 +71,12 @@ const Database::DatabaseEntry Database::XdotSingle = { { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } }, - { "default", { {"WGS1",128}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",256} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, } @@ -88,11 +89,10 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",128}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -104,6 +104,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WGS2",32} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",32}, {"WGS2",32} } }, { "Iris Pro", { {"WGS1",32}, {"WGS2",32} } }, @@ -121,12 +122,12 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",512}, {"WGS2",64} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",32}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, } @@ -139,11 +140,10 @@ const Database::DatabaseEntry Database::XdotDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -163,12 +163,12 @@ const Database::DatabaseEntry Database::XdotDouble = { { "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",64} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",64} } }, } }, } @@ -181,11 +181,10 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, - { "Hawaii", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -205,12 +204,12 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } }, { "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } }, { "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } }, - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",128}, {"WGS2",64} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",32} } }, + { "default", { {"WGS1",256}, {"WGS2",64} } }, } }, } diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp index 61b7ff05..d19c55b5 100644 --- a/src/database/kernels/xgemm.hpp +++ b/src/database/kernels/xgemm.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemmHalf = { + "Xgemm", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs @@ -43,6 +55,7 @@ const Database::DatabaseEntry Database::XgemmSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } }, { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, @@ -75,7 +88,7 @@ const Database::DatabaseEntry Database::XgemmSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } @@ -112,6 +125,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, @@ -156,7 +170,7 @@ const Database::DatabaseEntry Database::XgemmDouble = { "Xgemm", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, { "Hawaii", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, { "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, { "Pitcairn", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp index 6d680b06..e5e8845e 100644 --- a/src/database/kernels/xgemv.hpp +++ b/src/database/kernels/xgemv.hpp @@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XgemvHalf = { "Xgemv", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } }, - { "default", { {"WGS1",128}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",128}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1} } }, } }, } @@ -48,13 +49,14 @@ const Database::DatabaseEntry Database::XgemvSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WPT1",1} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WPT1",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Iris", { {"WGS1",64}, {"WPT1",2} } }, { "Iris Pro", { {"WGS1",256}, {"WPT1",2} } }, @@ -81,7 +83,7 @@ const Database::DatabaseEntry Database::XgemvSingle = { { "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } }, { "Tesla K20m", { {"WGS1",128}, {"WPT1",1} } }, { "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1} } }, } }, { // Default @@ -116,6 +118,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WPT1",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Iris", { {"WGS1",256}, {"WPT1",1} } }, @@ -161,14 +164,14 @@ const Database::DatabaseEntry Database::XgemvDouble = { { "Oland", { {"WGS1",256}, {"WPT1",1} } }, { "Pitcairn", { {"WGS1",256}, {"WPT1",1} } }, { "Tahiti", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",2} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",2} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel accelerators @@ -191,12 +194,12 @@ const Database::DatabaseEntry Database::XgemvDouble = { { "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } }, { "Tesla K20m", { {"WGS1",256}, {"WPT1",1} } }, { "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, } @@ -220,7 +223,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",4} } }, } }, { // Intel accelerators @@ -234,7 +237,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { { "GRID K520", { {"WGS1",128}, {"WPT1",1} } }, { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1} } }, { "GeForce GTX 670", { {"WGS1",128}, {"WPT1",1} } }, - { "default", { {"WGS1",64}, {"WPT1",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1} } }, } }, { // Default diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp index 65b15030..52af628c 100644 --- a/src/database/kernels/xgemv_fast.hpp +++ b/src/database/kernels/xgemv_fast.hpp @@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XgemvFastHalf = { "XgemvFast", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, } }, } @@ -48,17 +49,18 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } }, + { "default", { {"VW2",4}, {"WGS2",64}, {"WPT2",4} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",2}, {"WGS2",32}, {"WPT2",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, { "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } }, - { "default", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, + { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } }, } }, { // Intel accelerators @@ -81,7 +83,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = { { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Tesla K20m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, { "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, } }, { // Default @@ -116,6 +118,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",32}, {"WPT2",2} } }, { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, { "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, @@ -189,7 +192,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = { { "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "Tesla K20m", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } }, { "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, - { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } }, } }, { // Default diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index 9822fb20..328094e1 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -14,6 +14,18 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgemvFastRotHalf = { + "XgemvFastRot", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, { { // AMD GPUs @@ -30,9 +42,11 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { }, { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",64}, {"WPT3",16} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, - { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, } }, { // NVIDIA GPUs @@ -43,7 +57,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } }, } }, } @@ -67,14 +81,16 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { }, { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",128}, {"WPT3",8} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, { "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, - { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",8} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, } }, } @@ -104,7 +120,7 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, } @@ -128,7 +144,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",16} } }, } }, } diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp index 216925fc..3e9c25c1 100644 --- a/src/database/kernels/xger.hpp +++ b/src/database/kernels/xger.hpp @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::XgerHalf = { "Xger", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, } @@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { "Oland", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, { // ARM GPUs @@ -54,16 +55,17 @@ const Database::DatabaseEntry Database::XgerSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, - { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, + { "default", { {"WGS1",128}, {"WGS2",8}, {"WPT",4} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } }, { "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, - { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, } }, { // NVIDIA GPUs @@ -75,12 +77,12 @@ const Database::DatabaseEntry Database::XgerSingle = { { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, { "GeForce GTX 750", { {"WGS1",64}, {"WGS2",16}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, } }, } @@ -97,7 +99,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "Oland", { {"WGS1",4}, {"WGS2",8}, {"WPT",1} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, - { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, } }, { // ARM GPUs @@ -110,16 +112,17 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",4}, {"WPT",1} } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, - { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } }, } }, { // NVIDIA GPUs @@ -131,12 +134,12 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",32}, {"WGS2",16}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",2} } }, } }, } @@ -153,7 +156,7 @@ const Database::DatabaseEntry Database::XgerDouble = { { "Oland", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, - { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, } }, { // ARM GPUs @@ -166,7 +169,7 @@ const Database::DatabaseEntry Database::XgerDouble = { kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } }, + { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, } }, { // NVIDIA GPUs @@ -178,12 +181,12 @@ const Database::DatabaseEntry Database::XgerDouble = { { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, { "GeForce GTX 750", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } }, } }, } @@ -200,7 +203,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "Oland", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, { "Pitcairn", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, { "Tahiti", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, - { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, { // ARM GPUs @@ -225,12 +228,12 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } }, { "GeForce GTX 750", { {"WGS1",8}, {"WGS2",32}, {"WPT",4} } }, { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, - { "default", { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, } }, } |