diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-23 13:55:22 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-23 13:55:22 +0100 |
commit | b1f52f130c4f9e4346579003b2786aa2e082f234 (patch) | |
tree | 1c0807409adf163e8abe1cba52509873a6c65f48 | |
parent | aa7db4f987360fe1956add9391c6e81aa61b75f3 (diff) |
Updated the database to use the new TRSV and Invert tuners
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/database/database.cpp | 8 | ||||
-rw-r--r-- | src/database/kernels/invert.hpp | 78 | ||||
-rw-r--r-- | src/database/kernels/invert/invert.hpp | 14 | ||||
-rw-r--r-- | src/database/kernels/invert/invert_16.hpp | 34 | ||||
-rw-r--r-- | src/database/kernels/invert/invert_32.hpp | 34 | ||||
-rw-r--r-- | src/database/kernels/invert/invert_3232.hpp | 34 | ||||
-rw-r--r-- | src/database/kernels/invert/invert_64.hpp | 26 | ||||
-rw-r--r-- | src/database/kernels/invert/invert_6464.hpp | 26 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine.hpp | 14 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine_16.hpp | 26 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine_32.hpp | 34 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine_3232.hpp | 34 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine_64.hpp | 26 | ||||
-rw-r--r-- | src/database/kernels/trsv_routine/trsv_routine_6464.hpp | 26 | ||||
-rw-r--r-- | src/database/kernels/xtrsv.hpp | 78 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cpp | 2 | ||||
-rw-r--r-- | src/tuning/routines/xtrsv.cpp | 2 |
19 files changed, 336 insertions, 163 deletions
@@ -4,6 +4,7 @@ Development (next version) - Made it possible to override the tuning parameters in the clients straight from JSON tuning files - Added OpenCL pre-processor to unroll loops and perform array-to-register promotions for compilers which don't do this themselves (ARM Mali) - greatly improves performance on these platforms +- Added first tuners for the TRSV (block size) and TRSM (invert kernel) routines - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/CMakeLists.txt b/CMakeLists.txt index f83ba33c..63ab8e79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,7 +195,7 @@ endif() set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger xgemm xgemm_direct xgemv invert) set(DATABASES copy pad padtranspose transpose xaxpy xdot - xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger + xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert gemm_routine trsv_routine) set(ROUTINE_TUNERS xgemm xtrsv) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) diff --git a/src/database/database.cpp b/src/database/database.cpp index 2fa86151..56c93f18 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -29,11 +29,11 @@ #include "database/kernels/pad/pad.hpp" #include "database/kernels/transpose/transpose.hpp" #include "database/kernels/padtranspose/padtranspose.hpp" +#include "database/kernels/invert/invert.hpp" #include "database/kernels/gemm_routine/gemm_routine.hpp" +#include "database/kernels/trsv_routine/trsv_routine.hpp" -#include "database/kernels/xtrsv.hpp" -#include "database/kernels/invert.hpp" #include "database/apple_cpu_fallback.hpp" namespace clblast { @@ -47,7 +47,6 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data database::XgemvFastHalf, database::XgemvFastSingle, database::XgemvFastDouble, database::XgemvFastComplexSingle, database::XgemvFastComplexDouble, database::XgemvFastRotHalf, database::XgemvFastRotSingle, database::XgemvFastRotDouble, database::XgemvFastRotComplexSingle, database::XgemvFastRotComplexDouble, database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble, - database::XtrsvHalf, database::XtrsvSingle, database::XtrsvDouble, database::XtrsvComplexSingle, database::XtrsvComplexDouble, database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble, database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble, database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble, @@ -55,7 +54,8 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble, database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble, database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble, - database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble + database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble, + database::TrsvRoutineHalf, database::TrsvRoutineSingle, database::TrsvRoutineDouble, database::TrsvRoutineComplexSingle, database::TrsvRoutineComplexDouble }; const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{ database::XaxpyApple, database::XdotApple, diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp deleted file mode 100644 index b7464382..00000000 --- a/src/database/kernels/invert.hpp +++ /dev/null @@ -1,78 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> -// -// Tuning parameters for the diagonal matrix inversion kernels -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const DatabaseEntry InvertHalf = { - "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertSingle = { - "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertComplexSingle = { - "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertDouble = { - "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertComplexDouble = { - "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/invert/invert.hpp b/src/database/kernels/invert/invert.hpp new file mode 100644 index 00000000..9b7c2d30 --- /dev/null +++ b/src/database/kernels/invert/invert.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert' kernels. +// +// ================================================================================================= + +#include "database/kernels/invert/invert_16.hpp" +#include "database/kernels/invert/invert_32.hpp" +#include "database/kernels/invert/invert_3232.hpp" +#include "database/kernels/invert/invert_64.hpp" +#include "database/kernels/invert/invert_6464.hpp" diff --git a/src/database/kernels/invert/invert_16.hpp b/src/database/kernels/invert/invert_16.hpp new file mode 100644 index 00000000..e3941370 --- /dev/null +++ b/src/database/kernels/invert/invert_16.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertHalf = { + "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp new file mode 100644 index 00000000..ca07e947 --- /dev/null +++ b/src/database/kernels/invert/invert_32.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertSingle = { + "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp new file mode 100644 index 00000000..f01b3c7f --- /dev/null +++ b/src/database/kernels/invert/invert_3232.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertComplexSingle = { + "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp new file mode 100644 index 00000000..e73120ca --- /dev/null +++ b/src/database/kernels/invert/invert_64.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertDouble = { + "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp new file mode 100644 index 00000000..184b956a --- /dev/null +++ b/src/database/kernels/invert/invert_6464.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertComplexDouble = { + "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine.hpp b/src/database/kernels/trsv_routine/trsv_routine.hpp new file mode 100644 index 00000000..c4659ad1 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels. +// +// ================================================================================================= + +#include "database/kernels/trsv_routine/trsv_routine_16.hpp" +#include "database/kernels/trsv_routine/trsv_routine_32.hpp" +#include "database/kernels/trsv_routine/trsv_routine_3232.hpp" +#include "database/kernels/trsv_routine/trsv_routine_64.hpp" +#include "database/kernels/trsv_routine/trsv_routine_6464.hpp" diff --git a/src/database/kernels/trsv_routine/trsv_routine_16.hpp b/src/database/kernels/trsv_routine/trsv_routine_16.hpp new file mode 100644 index 00000000..c6d5d876 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_16.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineHalf = { + "TrsvRoutine", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_32.hpp b/src/database/kernels/trsv_routine/trsv_routine_32.hpp new file mode 100644 index 00000000..7912faf4 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_32.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineSingle = { + "TrsvRoutine", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp new file mode 100644 index 00000000..4c7f4c88 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineComplexSingle = { + "TrsvRoutine", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_64.hpp b/src/database/kernels/trsv_routine/trsv_routine_64.hpp new file mode 100644 index 00000000..e1897b79 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_64.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineDouble = { + "TrsvRoutine", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp new file mode 100644 index 00000000..082d3a8e --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineComplexDouble = { + "TrsvRoutine", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp deleted file mode 100644 index 2d6afbea..00000000 --- a/src/database/kernels/xtrsv.hpp +++ /dev/null @@ -1,78 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren <www.cedricnugteren.nl> -// -// This file populates the database with best-found tuning parameters for the 'Xtrsv' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const DatabaseEntry XtrsvHalf = { - "Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvSingle = { - "Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvComplexSingle = { - "Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvDouble = { - "Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvComplexDouble = { - "Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index b7e8081b..63dab9f7 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -22,7 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue<T>(), {}, { + Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "TrsvRoutine"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" #include "../../kernels/level2/xtrsv.opencl" diff --git a/src/tuning/routines/xtrsv.cpp b/src/tuning/routines/xtrsv.cpp index 9e8f26fa..29db0cd0 100644 --- a/src/tuning/routines/xtrsv.cpp +++ b/src/tuning/routines/xtrsv.cpp @@ -59,7 +59,7 @@ void TuneXtrsv(int argc, char* argv[]) { // Values for the block size const auto from = size_t{8}; - const auto to = size_t{64 + 1}; + const auto to = size_t{32 + 1}; const auto step = size_t{8}; // OpenCL initialisation |