diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-14 11:15:53 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-14 11:15:53 +0200 |
commit | 294a3e3d410c87ffcc7fc550e09b6d45c71a0af8 (patch) | |
tree | d68a45bb8312aabba9589bb1c51b2c6ffe0dc504 /include | |
parent | ab0064dab76c83ee9820acb62fa914c493c2563d (diff) |
Split the three variations of the GEMV kernel for maximal tuning freedom
Diffstat (limited to 'include')
-rw-r--r-- | include/internal/database/xgemv.h | 44 | ||||
-rw-r--r-- | include/internal/tuning.h | 8 |
2 files changed, 29 insertions, 23 deletions
diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index 37d33487..48ff42c8 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -18,24 +18,24 @@ const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { { // NVIDIA GPUs CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { - { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // AMD GPUs CL_DEVICE_TYPE_GPU, "AMD", { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // Intel GPUs CL_DEVICE_TYPE_GPU, "Intel", { - { "Iris", { {"WGS",128}, {"WPT",4}, {"VW",4} } }, + { "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } }, } }, { // Default CL_DEVICE_TYPE_ALL, kDefault, { - { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, } @@ -47,14 +47,14 @@ const Database::DatabaseEntry Database::XgemvDouble = { "Xgemv", Precision::kDouble, { { // NVIDIA GPUs CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { - { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // AMD GPUs CL_DEVICE_TYPE_GPU, "AMD", { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // Intel GPUs @@ -63,7 +63,7 @@ const Database::DatabaseEntry Database::XgemvDouble = { }, { // Default CL_DEVICE_TYPE_ALL, kDefault, { - { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, } @@ -74,24 +74,24 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { "Xgemv", Precision::kComplexSingle, { { // NVIDIA GPUs CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { - { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // AMD GPUs CL_DEVICE_TYPE_GPU, "AMD", { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // Intel GPUs CL_DEVICE_TYPE_GPU, "Intel", { - { "Iris", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "Iris", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // Default CL_DEVICE_TYPE_ALL, kDefault, { - { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, } @@ -103,14 +103,14 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { { // NVIDIA GPUs CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { - { "GeForce GTX 480", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // AMD GPUs CL_DEVICE_TYPE_GPU, "AMD", { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, { // Intel GPUs @@ -119,7 +119,7 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { }, { // Default CL_DEVICE_TYPE_ALL, kDefault, { - { kDefault, { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, } }, } diff --git a/include/internal/tuning.h b/include/internal/tuning.h index 6ddf4b3a..d0cf6b5d 100644 --- a/include/internal/tuning.h +++ b/include/internal/tuning.h @@ -34,13 +34,19 @@ using Tuner3 = std::function<void(const Arguments<T>&, const std::vector<T>&, const std::vector<T>&, std::vector<T>&, cltune::Tuner&)>; +// As above, but now with an additional ID for the variation +template <typename T> +using Tuner3V = std::function<void(const Arguments<T>&, const size_t, + const std::vector<T>&, const std::vector<T>&, std::vector<T>&, + cltune::Tuner&)>; + // Tuner for vector-vector input template <typename T> void TunerXY(int argc, char* argv[], const Tuner2<T> &tune_function); // Tuner for matrix-vector-vector input template <typename T> -void TunerAXY(int argc, char* argv[], const Tuner3<T> &tune_function); +void TunerAXY(int argc, char* argv[], const size_t num_variations, const Tuner3V<T> &tune_function); // Tuner for matrix-matrix input template <typename T> |