summaryrefslogtreecommitdiff
path: root/src/database/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-09-21 21:32:18 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-09-21 21:32:18 +0200
commit6aa652d6ea2389744195ae5cd19321325b2d71aa (patch)
tree58243cb4fbebb094c543028124412718cdeb7c97 /src/database/kernels
parent5004a435ff984bba0dff0147a5c4f6a04d703562 (diff)
parentb1929d8ce7022cacbd1812d62098ebd0681bc1ef (diff)
Merge branch 'development' into gemm_direct
Diffstat (limited to 'src/database/kernels')
-rw-r--r--src/database/kernels/copy.hpp39
-rw-r--r--src/database/kernels/pad.hpp41
-rw-r--r--src/database/kernels/padtranspose.hpp33
-rw-r--r--src/database/kernels/transpose.hpp39
-rw-r--r--src/database/kernels/xaxpy.hpp45
-rw-r--r--src/database/kernels/xdot.hpp33
-rw-r--r--src/database/kernels/xgemm.hpp18
-rw-r--r--src/database/kernels/xgemv.hpp25
-rw-r--r--src/database/kernels/xgemv_fast.hpp15
-rw-r--r--src/database/kernels/xgemv_fast_rot.hpp28
-rw-r--r--src/database/kernels/xger.hpp41
11 files changed, 205 insertions, 152 deletions
diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp
index d592f110..a6b7dfe8 100644
--- a/src/database/kernels/copy.hpp
+++ b/src/database/kernels/copy.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::CopyHalf = {
"Copy", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",4} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
{ "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
}
@@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::CopySingle = {
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
}
},
{ // ARM GPUs
@@ -61,11 +62,12 @@ const Database::DatabaseEntry Database::CopySingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
@@ -85,15 +87,15 @@ const Database::DatabaseEntry Database::CopySingle = {
{ "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } },
- { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
}
},
}
@@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // Intel CPUs
@@ -118,17 +120,18 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
{ "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
{ "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
}
},
{ // Intel accelerators
@@ -149,12 +152,12 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
{ "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
{ "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
}
@@ -171,7 +174,7 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
{ "Pitcairn", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
}
},
{ // ARM GPUs
@@ -185,7 +188,7 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
@@ -208,12 +211,12 @@ const Database::DatabaseEntry Database::CopyDouble = {
{ "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
}
},
}
@@ -230,7 +233,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = {
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Pitcairn", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
{ // ARM GPUs
@@ -244,7 +247,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } },
}
},
{ // Intel accelerators
@@ -272,7 +275,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
}
},
}
diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp
index cd034f15..3cfabaf4 100644
--- a/src/database/kernels/pad.hpp
+++ b/src/database/kernels/pad.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::PadHalf = {
"Pad", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
@@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Pitcairn", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // ARM GPUs
@@ -55,17 +56,18 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
{ // Intel accelerators
@@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::PadSingle = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
}
@@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // ARM GPUs
@@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
{ "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
}
},
{ // Intel accelerators
@@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
@@ -179,7 +182,7 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // ARM GPUs
@@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel accelerators
@@ -216,12 +219,12 @@ const Database::DatabaseEntry Database::PadDouble = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
@@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Pitcairn", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // ARM GPUs
@@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
}
},
{ // Intel accelerators
@@ -275,12 +278,12 @@ const Database::DatabaseEntry Database::PadComplexDouble = {
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp
index c2034c3e..88bd4ea7 100644
--- a/src/database/kernels/padtranspose.hpp
+++ b/src/database/kernels/padtranspose.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::PadtransposeHalf = {
"Padtranspose", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
{ "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
}
@@ -55,12 +56,13 @@ const Database::DatabaseEntry Database::PadtransposeSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
@@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::PadtransposeSingle = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
}
},
}
@@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -130,11 +132,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
{ "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
{ "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
}
},
{ // Intel accelerators
@@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
}
},
}
@@ -179,7 +182,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -216,12 +219,12 @@ const Database::DatabaseEntry Database::PadtransposeDouble = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
}
},
}
@@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
- { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -275,12 +278,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
}
},
}
diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp
index 8e852c4b..0e1b608e 100644
--- a/src/database/kernels/transpose.hpp
+++ b/src/database/kernels/transpose.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::TransposeHalf = {
"Transpose", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
@@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
{ "Pitcairn", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -61,11 +62,12 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
{ "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // Intel accelerators
@@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::TransposeSingle = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
}
},
}
@@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "Oland", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Pitcairn", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
}
},
{ // ARM GPUs
@@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -151,12 +154,12 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
}
},
}
@@ -173,7 +176,7 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
{ "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
}
},
{ // ARM GPUs
@@ -187,7 +190,7 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
}
},
{ // Intel accelerators
@@ -210,12 +213,12 @@ const Database::DatabaseEntry Database::TransposeDouble = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
}
},
}
@@ -232,7 +235,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "Oland", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
}
},
{ // ARM GPUs
@@ -246,7 +249,7 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
+ { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // NVIDIA GPUs
@@ -263,12 +266,12 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = {
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
}
},
}
diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp
index 905ee084..9c1bcd99 100644
--- a/src/database/kernels/xaxpy.hpp
+++ b/src/database/kernels/xaxpy.hpp
@@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XaxpyHalf = {
"Xaxpy", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
- { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
+ { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
+ { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
}
},
}
@@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Pitcairn", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -55,13 +56,14 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
+ { "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",8}, {"WGS",256}, {"WPT",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",1}, {"WGS",512}, {"WPT",2} } },
{ "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Iris Pro", { {"VW",1}, {"WGS",128}, {"WPT",2} } },
@@ -77,10 +79,10 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
- { "GeForce GTX 1070", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
- { "GeForce GTX 480", { {"VW",4}, {"WGS",64}, {"WPT",1} } },
+ { "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",4} } },
+ { "GeForce GTX 480", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "GeForce GTX 670", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
- { "GeForce GTX 680", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
+ { "GeForce GTX 680", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "GeForce GTX 750", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
{ "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
@@ -88,12 +90,12 @@ const Database::DatabaseEntry Database::XaxpySingle = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",64}, {"WPT",1} } },
}
},
}
@@ -110,7 +112,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Pitcairn", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -124,17 +126,18 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS",1024}, {"WPT",2} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
+ { "default", { {"VW",8}, {"WGS",1024}, {"WPT",1} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"VW",4}, {"WGS",64}, {"WPT",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",2}, {"WGS",512}, {"WPT",1} } },
{ "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",256}, {"WPT",2} } },
}
},
{ // Intel accelerators
@@ -157,12 +160,12 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
}
@@ -193,7 +196,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",8}, {"WGS",2048}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
}
},
{ // Intel accelerators
@@ -206,7 +209,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "GeForce GTX 1070", { {"VW",1}, {"WGS",64}, {"WPT",8} } },
- { "GeForce GTX 480", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
+ { "GeForce GTX 480", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "GeForce GTX 670", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "GeForce GTX 750", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
@@ -216,7 +219,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
{ "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
{ "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // Default
@@ -238,7 +241,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = {
{ "Oland", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
{ "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
{ "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -252,7 +255,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
- { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",1024}, {"WPT",1} } },
}
},
{ // Intel accelerators
@@ -280,7 +283,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
+ { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
}
},
}
diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp
index e36dd8ca..987a990d 100644
--- a/src/database/kernels/xdot.hpp
+++ b/src/database/kernels/xdot.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::XdotHalf = {
"Xdot", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",32}, {"WGS2",32} } },
{ "default", { {"WGS1",32}, {"WGS2",32} } },
}
@@ -37,7 +38,6 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } },
- { "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
@@ -53,10 +53,11 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WGS2",32} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WGS2",32} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",32} } },
{ "Iris Pro", { {"WGS1",512}, {"WGS2",64} } },
- { "default", { {"WGS1",32}, {"WGS2",32} } },
+ { "default", { {"WGS1",64}, {"WGS2",32} } },
}
},
{ // NVIDIA GPUs
@@ -70,12 +71,12 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } },
- { "default", { {"WGS1",128}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",256} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",32}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
}
@@ -88,11 +89,10 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
- { "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",128}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -104,6 +104,7 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WGS2",32} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",32}, {"WGS2",32} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",32}, {"WGS2",32} } },
{ "Iris Pro", { {"WGS1",32}, {"WGS2",32} } },
@@ -121,12 +122,12 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",512}, {"WGS2",64} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",32}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
}
@@ -139,11 +140,10 @@ const Database::DatabaseEntry Database::XdotDouble = {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } },
- { "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -163,12 +163,12 @@ const Database::DatabaseEntry Database::XdotDouble = {
{ "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",64} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",64} } },
}
},
}
@@ -181,11 +181,10 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
- { "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",32} } },
}
},
{ // Intel CPUs
@@ -205,12 +204,12 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
{ "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } },
{ "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } },
{ "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } },
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",128}, {"WGS2",64} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",64}, {"WGS2",32} } },
+ { "default", { {"WGS1",256}, {"WGS2",64} } },
}
},
}
diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp
index 61b7ff05..d19c55b5 100644
--- a/src/database/kernels/xgemm.hpp
+++ b/src/database/kernels/xgemm.hpp
@@ -14,6 +14,18 @@
namespace clblast {
// =================================================================================================
+const Database::DatabaseEntry Database::XgemmHalf = {
+ "Xgemm", Precision::kHalf, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
const Database::DatabaseEntry Database::XgemmSingle = {
"Xgemm", Precision::kSingle, {
{ // AMD GPUs
@@ -43,6 +55,7 @@ const Database::DatabaseEntry Database::XgemmSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } },
{ "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } },
@@ -75,7 +88,7 @@ const Database::DatabaseEntry Database::XgemmSingle = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
+ { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
}
},
}
@@ -112,6 +125,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",4}, {"VWN",1} } },
{ "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
@@ -156,7 +170,7 @@ const Database::DatabaseEntry Database::XgemmDouble = {
"Xgemm", Precision::kDouble, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
{ "Hawaii", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
{ "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
{ "Pitcairn", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp
index 6d680b06..e5e8845e 100644
--- a/src/database/kernels/xgemv.hpp
+++ b/src/database/kernels/xgemv.hpp
@@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XgemvHalf = {
"Xgemv", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } },
- { "default", { {"WGS1",128}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",128}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",1} } },
}
},
}
@@ -48,13 +49,14 @@ const Database::DatabaseEntry Database::XgemvSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",256}, {"WPT1",1} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WPT1",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } },
{ "Iris", { {"WGS1",64}, {"WPT1",2} } },
{ "Iris Pro", { {"WGS1",256}, {"WPT1",2} } },
@@ -81,7 +83,7 @@ const Database::DatabaseEntry Database::XgemvSingle = {
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } },
{ "Tesla K20m", { {"WGS1",128}, {"WPT1",1} } },
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",256}, {"WPT1",1} } },
}
},
{ // Default
@@ -116,6 +118,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",64}, {"WPT1",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WPT1",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WPT1",1} } },
{ "Iris", { {"WGS1",256}, {"WPT1",1} } },
@@ -161,14 +164,14 @@ const Database::DatabaseEntry Database::XgemvDouble = {
{ "Oland", { {"WGS1",256}, {"WPT1",1} } },
{ "Pitcairn", { {"WGS1",256}, {"WPT1",1} } },
{ "Tahiti", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",256}, {"WPT1",1} } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",2} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",2} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel accelerators
@@ -191,12 +194,12 @@ const Database::DatabaseEntry Database::XgemvDouble = {
{ "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } },
{ "Tesla K20m", { {"WGS1",256}, {"WPT1",1} } },
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1} } },
}
},
}
@@ -220,7 +223,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",64}, {"WPT1",4} } },
}
},
{ // Intel accelerators
@@ -234,7 +237,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
{ "GRID K520", { {"WGS1",128}, {"WPT1",1} } },
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1} } },
{ "GeForce GTX 670", { {"WGS1",128}, {"WPT1",1} } },
- { "default", { {"WGS1",64}, {"WPT1",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp
index 65b15030..52af628c 100644
--- a/src/database/kernels/xgemv_fast.hpp
+++ b/src/database/kernels/xgemv_fast.hpp
@@ -18,13 +18,14 @@ const Database::DatabaseEntry Database::XgemvFastHalf = {
"XgemvFast", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
- { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
+ { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
+ { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
}
},
}
@@ -48,17 +49,18 @@ const Database::DatabaseEntry Database::XgemvFastSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",4} } },
+ { "default", { {"VW2",4}, {"WGS2",64}, {"WPT2",4} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",2}, {"WGS2",32}, {"WPT2",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",4}, {"WGS2",128}, {"WPT2",4} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
{ "Iris", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } },
{ "Iris Pro", { {"VW2",1}, {"WGS2",128}, {"WPT2",2} } },
- { "default", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
+ { "default", { {"VW2",2}, {"WGS2",64}, {"WPT2",2} } },
}
},
{ // Intel accelerators
@@ -81,7 +83,7 @@ const Database::DatabaseEntry Database::XgemvFastSingle = {
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
{ "Tesla K20m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
+ { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
}
},
{ // Default
@@ -116,6 +118,7 @@ const Database::DatabaseEntry Database::XgemvFastComplexSingle = {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",32}, {"WPT2",2} } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
{ "Iris", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
@@ -189,7 +192,7 @@ const Database::DatabaseEntry Database::XgemvFastDouble = {
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
{ "Tesla K20m", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
- { "default", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
+ { "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
}
},
{ // Default
diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp
index 9822fb20..328094e1 100644
--- a/src/database/kernels/xgemv_fast_rot.hpp
+++ b/src/database/kernels/xgemv_fast_rot.hpp
@@ -14,6 +14,18 @@
namespace clblast {
// =================================================================================================
+const Database::DatabaseEntry Database::XgemvFastRotHalf = {
+ "XgemvFastRot", Precision::kHalf, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
const Database::DatabaseEntry Database::XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, {
{ // AMD GPUs
@@ -30,9 +42,11 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = {
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",64}, {"WPT3",16} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } },
{ "Iris Pro", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
- { "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
+ { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
}
},
{ // NVIDIA GPUs
@@ -43,7 +57,7 @@ const Database::DatabaseEntry Database::XgemvFastRotSingle = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } },
+ { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
}
},
}
@@ -67,14 +81,16 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexSingle = {
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"VW3",4}, {"WGS3",128}, {"WPT3",8} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } },
{ "Iris Pro", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
- { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",8} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",2}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } },
}
},
}
@@ -104,7 +120,7 @@ const Database::DatabaseEntry Database::XgemvFastRotDouble = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",1}, {"WGS3",16}, {"WPT3",8} } },
+ { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
}
},
}
@@ -128,7 +144,7 @@ const Database::DatabaseEntry Database::XgemvFastRotComplexDouble = {
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
+ { "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",16} } },
}
},
}
diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp
index 216925fc..3e9c25c1 100644
--- a/src/database/kernels/xger.hpp
+++ b/src/database/kernels/xger.hpp
@@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::XgerHalf = {
"Xger", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
{ "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
}
@@ -41,7 +42,7 @@ const Database::DatabaseEntry Database::XgerSingle = {
{ "Oland", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -54,16 +55,17 @@ const Database::DatabaseEntry Database::XgerSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
- { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
+ { "default", { {"WGS1",128}, {"WGS2",8}, {"WPT",4} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } },
{ "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } },
- { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",2} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -75,12 +77,12 @@ const Database::DatabaseEntry Database::XgerSingle = {
{ "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
{ "GeForce GTX 750", { {"WGS1",64}, {"WGS2",16}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
}
},
}
@@ -97,7 +99,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
{ "Oland", { {"WGS1",4}, {"WGS2",8}, {"WPT",1} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
- { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -110,16 +112,17 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
+ { "default", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",128}, {"WGS2",4}, {"WPT",1} } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
{ "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } },
- { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -131,12 +134,12 @@ const Database::DatabaseEntry Database::XgerComplexSingle = {
{ "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
{ "GeForce GTX 750", { {"WGS1",32}, {"WGS2",16}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
- { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } },
+ { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",4}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",2} } },
}
},
}
@@ -153,7 +156,7 @@ const Database::DatabaseEntry Database::XgerDouble = {
{ "Oland", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
- { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -166,7 +169,7 @@ const Database::DatabaseEntry Database::XgerDouble = {
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
- { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } },
+ { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
}
},
{ // NVIDIA GPUs
@@ -178,12 +181,12 @@ const Database::DatabaseEntry Database::XgerDouble = {
{ "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
{ "GeForce GTX 750", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
- { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } },
+ { "default", { {"WGS1",256}, {"WGS2",2}, {"WPT",2} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
}
},
}
@@ -200,7 +203,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = {
{ "Oland", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
{ "Pitcairn", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
{ "Tahiti", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
- { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
}
},
{ // ARM GPUs
@@ -225,12 +228,12 @@ const Database::DatabaseEntry Database::XgerComplexDouble = {
{ "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } },
{ "GeForce GTX 750", { {"WGS1",8}, {"WGS2",32}, {"WPT",4} } },
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
- { "default", { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } },
+ { "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } },
}
},
}