diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-07-25 22:57:23 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-07-25 22:57:23 +0200 |
commit | de1afe168d8da92d49d0239d8b5ff4385ae37326 (patch) | |
tree | 6da7de8372220f38a4c818b36d154b4624400859 | |
parent | 2582f0290a396305ee3b86fb544e999fd55fe323 (diff) |
Removed all old tuning results for the XgemvFastRot kernel; re-added for a couple of devices
-rw-r--r-- | src/database/database.cpp | 2 | ||||
-rw-r--r-- | src/database/database.hpp | 2 | ||||
-rw-r--r-- | src/database/kernels/xgemv_fast_rot.hpp | 167 |
3 files changed, 32 insertions, 139 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp index 28124455..38974b95 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -35,7 +35,7 @@ const std::vector<Database::DatabaseEntry> Database::database = { XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, - XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, + /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, diff --git a/src/database/database.hpp b/src/database/database.hpp index 2fd96411..8d6d3863 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -72,7 +72,7 @@ class Database { static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; - static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; + static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp index ee866e26..9822fb20 100644 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ b/src/database/kernels/xgemv_fast_rot.hpp @@ -14,79 +14,36 @@ namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::XgemvFastRotHalf = { - "XgemvFastRot", Precision::kHalf, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - } -}; - -// ================================================================================================= - const Database::DatabaseEntry Database::XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Oland", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } }, - { "Pitcairn", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } }, + { "default", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",64}, {"WPT3",4} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "Iris", { {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, - { "Iris Pro", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } }, + { "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "GeForce GTX 1070", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "GeForce GTX 480", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "GeForce GTX 670", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "GeForce GTX 680", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } }, - { "GeForce GTX 750", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } }, - { "GeForce GTX 750 Ti", { {"VW3",4}, {"WGS3",128}, {"WPT3",4} } }, - { "GeForce GTX 980", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "GeForce GTX TITAN X", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "Tesla K20m", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "Tesla K40m", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, } }, } @@ -98,49 +55,26 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = { "XgemvFastRot", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Oland", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "Pitcairn", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",128}, {"WPT3",4} } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "Iris", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Iris Pro", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 680", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } }, + { "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } }, } }, } @@ -152,47 +86,25 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = { "XgemvFastRot", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Oland", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } }, - { "Pitcairn", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "Tahiti", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",2} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } }, + { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 1070", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 680", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } }, - { "GeForce GTX 750", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "GeForce GTX 750 Ti", { {"VW3",2}, {"WGS3",256}, {"WPT3",2} } }, - { "GeForce GTX 980", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "GeForce GTX TITAN X", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "Tesla K20m", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Tesla K40m", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } }, } }, } @@ -204,38 +116,19 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = { "XgemvFastRot", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Oland", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "Pitcairn", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, + { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",256}, {"WPT3",2} } }, - { "default", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } }, + { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } }, } }, } |