summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-06-14 11:15:53 +0200
committerCNugteren <web@cedricnugteren.nl>2015-06-14 11:15:53 +0200
commit294a3e3d410c87ffcc7fc550e09b6d45c71a0af8 (patch)
treed68a45bb8312aabba9589bb1c51b2c6ffe0dc504 /include
parentab0064dab76c83ee9820acb62fa914c493c2563d (diff)
Split the three variations of the GEMV kernel for maximal tuning freedom
Diffstat (limited to 'include')
-rw-r--r--include/internal/database/xgemv.h44
-rw-r--r--include/internal/tuning.h8
2 files changed, 29 insertions, 23 deletions
diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h
index 37d33487..48ff42c8 100644
--- a/include/internal/database/xgemv.h
+++ b/include/internal/database/xgemv.h
@@ -18,24 +18,24 @@ const Database::DatabaseEntry Database::XgemvSingle = {
"Xgemv", Precision::kSingle, {
{ // NVIDIA GPUs
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
- { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
CL_DEVICE_TYPE_GPU, "AMD", {
- { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
CL_DEVICE_TYPE_GPU, "Intel", {
- { "Iris", { {"WGS",128}, {"WPT",4}, {"VW",4} } },
+ { "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } },
}
},
{ // Default
CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -47,14 +47,14 @@ const Database::DatabaseEntry Database::XgemvDouble = {
"Xgemv", Precision::kDouble, {
{ // NVIDIA GPUs
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
- { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
CL_DEVICE_TYPE_GPU, "AMD", {
- { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
@@ -63,7 +63,7 @@ const Database::DatabaseEntry Database::XgemvDouble = {
},
{ // Default
CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -74,24 +74,24 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = {
"Xgemv", Precision::kComplexSingle, {
{ // NVIDIA GPUs
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
- { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
CL_DEVICE_TYPE_GPU, "AMD", {
- { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
CL_DEVICE_TYPE_GPU, "Intel", {
- { "Iris", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "Iris", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Default
CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -103,14 +103,14 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
"Xgemv", Precision::kComplexDouble, {
{ // NVIDIA GPUs
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
- { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
- { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
CL_DEVICE_TYPE_GPU, "AMD", {
- { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
@@ -119,7 +119,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = {
},
{ // Default
CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
diff --git a/include/internal/tuning.h b/include/internal/tuning.h
index 6ddf4b3a..d0cf6b5d 100644
--- a/include/internal/tuning.h
+++ b/include/internal/tuning.h
@@ -34,13 +34,19 @@ using Tuner3 = std::function<void(const Arguments<T>&,
const std::vector<T>&, const std::vector<T>&, std::vector<T>&,
cltune::Tuner&)>;
+// As above, but now with an additional ID for the variation
+template <typename T>
+using Tuner3V = std::function<void(const Arguments<T>&, const size_t,
+ const std::vector<T>&, const std::vector<T>&, std::vector<T>&,
+ cltune::Tuner&)>;
+
// Tuner for vector-vector input
template <typename T>
void TunerXY(int argc, char* argv[], const Tuner2<T> &tune_function);
// Tuner for matrix-vector-vector input
template <typename T>
-void TunerAXY(int argc, char* argv[], const Tuner3<T> &tune_function);
+void TunerAXY(int argc, char* argv[], const size_t num_variations, const Tuner3V<T> &tune_function);
// Tuner for matrix-matrix input
template <typename T>