summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-25 22:57:23 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-25 22:57:23 +0200
commitde1afe168d8da92d49d0239d8b5ff4385ae37326 (patch)
tree6da7de8372220f38a4c818b36d154b4624400859
parent2582f0290a396305ee3b86fb544e999fd55fe323 (diff)
Removed all old tuning results for the XgemvFastRot kernel; re-added for a couple of devices
-rw-r--r--src/database/database.cpp2
-rw-r--r--src/database/database.hpp2
-rw-r--r--src/database/kernels/xgemv_fast_rot.hpp167
3 files changed, 32 insertions, 139 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 28124455..38974b95 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -35,7 +35,7 @@ const std::vector<Database::DatabaseEntry> Database::database = {
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble,
- XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
+ /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
/* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
diff --git a/src/database/database.hpp b/src/database/database.hpp
index 2fd96411..8d6d3863 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -72,7 +72,7 @@ class Database {
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble;
- static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
+ static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp
index ee866e26..9822fb20 100644
--- a/src/database/kernels/xgemv_fast_rot.hpp
+++ b/src/database/kernels/xgemv_fast_rot.hpp
@@ -14,79 +14,36 @@
namespace clblast {
// =================================================================================================
-const Database::DatabaseEntry Database::XgemvFastRotHalf = {
- "XgemvFastRot", Precision::kHalf, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- }
-};
-
-// =================================================================================================
-
const Database::DatabaseEntry Database::XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Oland", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } },
- { "Pitcairn", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } },
+ { "default", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",64}, {"WPT3",4} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } },
+ { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "Iris", { {"VW3",4}, {"WGS3",64}, {"WPT3",8} } },
- { "Iris Pro", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } },
+ { "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
+ { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "GeForce GTX 1070", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "GeForce GTX 480", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "GeForce GTX 670", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "GeForce GTX 680", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } },
- { "GeForce GTX 750", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } },
- { "GeForce GTX 750 Ti", { {"VW3",4}, {"WGS3",128}, {"WPT3",4} } },
- { "GeForce GTX 980", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "GeForce GTX TITAN X", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "Tesla K20m", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "Tesla K40m", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } },
}
},
}
@@ -98,49 +55,26 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = {
"XgemvFastRot", Precision::kComplexSingle, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Oland", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "Pitcairn", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",128}, {"WPT3",4} } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "Iris", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Iris Pro", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 680", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } },
+ { "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
}
},
}
@@ -152,47 +86,25 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = {
"XgemvFastRot", Precision::kDouble, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Oland", { {"VW3",4}, {"WGS3",256}, {"WPT3",4} } },
- { "Pitcairn", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "Tahiti", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",1}, {"WGS3",64}, {"WPT3",2} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } },
+ { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",8} } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 1070", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 680", { {"VW3",2}, {"WGS3",128}, {"WPT3",2} } },
- { "GeForce GTX 750", { {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
- { "GeForce GTX 750 Ti", { {"VW3",2}, {"WGS3",256}, {"WPT3",2} } },
- { "GeForce GTX 980", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "GeForce GTX TITAN X", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "Tesla K20m", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Tesla K40m", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } },
}
},
}
@@ -204,38 +116,19 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = {
"XgemvFastRot", Precision::kComplexDouble, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
- { "Hawaii", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Oland", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "Pitcairn", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "Tahiti", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
+ { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW3",2}, {"WGS3",256}, {"WPT3",2} } },
- { "default", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
- { "GeForce GTX 480", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "GeForce GTX 670", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
}
},
}