diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-09-16 20:37:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-16 20:37:09 +0200 |
commit | 7d0ef8e10d05ee3a18360295c021ab6a6ef32c2d (patch) | |
tree | 55d795f06769134601f017f50d505a6c8904d398 /src/database/kernels | |
parent | bb947890dec90712c92028c20234eafd48e6fa3e (diff) | |
parent | bcf39eb79a8252b9f9b0c31311c7951abc8520ee (diff) |
Merge pull request #191 from CNugteren/database_improvements
Database improvements
Diffstat (limited to 'src/database/kernels')
86 files changed, 6949 insertions, 3802 deletions
diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp deleted file mode 100644 index e5defb32..00000000 --- a/src/database/kernels/copy.hpp +++ /dev/null @@ -1,354 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Copy' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry CopyHalf = { - "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } }, - { "default", { 8, 32, 4, 8 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopySingle = { - "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 8, 8, 4, 8 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 2, 2 } }, - { "Oland", { 32, 8, 4, 2 } }, - { "Pitcairn", { 8, 16, 4, 1 } }, - { "Tahiti", { 32, 8, 2, 2 } }, - { "Tonga", { 32, 8, 4, 4 } }, - { "Turks", { 8, 8, 4, 2 } }, - { "default", { 8, 16, 4, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 2, 4 } }, - { "default", { 32, 8, 2, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } }, - { "default", { 32, 16, 8, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 8, 4, 4 } }, - { "default", { 8, 8, 2, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 4, 1 } }, - { "GeForce GT 650M", { 16, 16, 4, 2 } }, - { "GeForce GTX 1070", { 8, 16, 4, 1 } }, - { "GeForce GTX 1080", { 8, 32, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 4, 1 } }, - { "GeForce GTX 670", { 16, 32, 4, 1 } }, - { "GeForce GTX 680", { 32, 16, 4, 1 } }, - { "GeForce GTX 750", { 32, 8, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } }, - { "GeForce GTX 980", { 32, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 4 } }, - { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } }, - { "GeForce GTX TITAN X", { 32, 8, 1, 2 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 8, 8, 4, 4 } }, - { "Tesla K40m", { 8, 8, 4, 2 } }, - { "default", { 8, 32, 4, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyComplexSingle = { - "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } }, - { "Ellesmere", { 16, 16, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 8, 8, 2, 2 } }, - { "Tonga", { 8, 32, 1, 2 } }, - { "Turks", { 32, 8, 4, 1 } }, - { "default", { 16, 8, 1, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 16, 1, 4 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 1, 1 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 2 } }, - { "GeForce GTX 480", { 16, 16, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 750", { 16, 8, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 2, 1 } }, - { "Tesla K20m", { 8, 8, 1, 4 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyDouble = { - "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 32, 8, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 32, 8, 2, 8 } }, - { "Pitcairn", { 32, 8, 1, 1 } }, - { "Tahiti", { 8, 32, 2, 1 } }, - { "Tonga", { 8, 32, 2, 4 } }, - { "default", { 16, 8, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 8, 2 } }, - { "default", { 16, 8, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } }, - { "default", { 8, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 16, 2, 1 } }, - { "GeForce GTX 1070", { 8, 8, 4, 1 } }, - { "GeForce GTX 1080", { 8, 8, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 2, 1 } }, - { "GeForce GTX 670", { 8, 8, 2, 1 } }, - { "GeForce GTX 680", { 16, 32, 2, 1 } }, - { "GeForce GTX 750", { 8, 16, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } }, - { "GeForce GTX 980", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 32, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } }, - { "GeForce GTX TITAN X", { 32, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 2, 2 } }, - { "Tesla K20m", { 8, 8, 2, 1 } }, - { "Tesla K40m", { 8, 8, 2, 2 } }, - { "default", { 32, 32, 2, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyComplexDouble = { - "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } }, - { "Ellesmere", { 8, 32, 1, 2 } }, - { "Fiji", { 8, 16, 1, 1 } }, - { "Hawaii", { 32, 8, 2, 8 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 16, 8, 2, 1 } }, - { "default", { 8, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 2 } }, - { "default", { 32, 8, 1, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 4 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 32, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 1, 2 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 8, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/copy/copy.hpp b/src/database/kernels/copy/copy.hpp new file mode 100644 index 00000000..8c6e7e03 --- /dev/null +++ b/src/database/kernels/copy/copy.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy' kernels. +// +// ================================================================================================= + +#include "database/kernels/copy/copy_16.hpp" +#include "database/kernels/copy/copy_32.hpp" +#include "database/kernels/copy/copy_3232.hpp" +#include "database/kernels/copy/copy_64.hpp" +#include "database/kernels/copy/copy_6464.hpp" diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp new file mode 100644 index 00000000..faf6d2bc --- /dev/null +++ b/src/database/kernels/copy/copy_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyHalf = { + "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp new file mode 100644 index 00000000..28b56cae --- /dev/null +++ b/src/database/kernels/copy/copy_32.hpp @@ -0,0 +1,162 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopySingle = { + "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 8, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 16, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 8, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp new file mode 100644 index 00000000..3c742734 --- /dev/null +++ b/src/database/kernels/copy/copy_3232.hpp @@ -0,0 +1,151 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyComplexSingle = { + "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 32, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp new file mode 100644 index 00000000..07261310 --- /dev/null +++ b/src/database/kernels/copy/copy_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyDouble = { + "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 32, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 16, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 16, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp new file mode 100644 index 00000000..7b52eb08 --- /dev/null +++ b/src/database/kernels/copy/copy_6464.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyComplexDouble = { + "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp index 193d1ab4..b7464382 100644 --- a/src/database/kernels/invert.hpp +++ b/src/database/kernels/invert.hpp @@ -15,11 +15,11 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry InvertHalf = { +const DatabaseEntry InvertHalf = { "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -27,11 +27,11 @@ const Database::DatabaseEntry InvertHalf = { // ================================================================================================= -const Database::DatabaseEntry InvertSingle = { +const DatabaseEntry InvertSingle = { "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -39,11 +39,11 @@ const Database::DatabaseEntry InvertSingle = { // ================================================================================================= -const Database::DatabaseEntry InvertComplexSingle = { +const DatabaseEntry InvertComplexSingle = { "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -51,11 +51,11 @@ const Database::DatabaseEntry InvertComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry InvertDouble = { +const DatabaseEntry InvertDouble = { "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -63,11 +63,11 @@ const Database::DatabaseEntry InvertDouble = { // ================================================================================================= -const Database::DatabaseEntry InvertComplexDouble = { +const DatabaseEntry InvertComplexDouble = { "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp deleted file mode 100644 index b6ebde43..00000000 --- a/src/database/kernels/pad.hpp +++ /dev/null @@ -1,362 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Pad' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry PadHalf = { - "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 1, 2 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } }, - { "default", { 8, 8, 2, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } }, - { "default", { 16, 8, 4, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 8, 4, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadSingle = { - "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } }, - { "Ellesmere", { 32, 8, 2, 2 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 4 } }, - { "Oland", { 8, 8, 1, 2 } }, - { "Pitcairn", { 32, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 2 } }, - { "Tonga", { 16, 16, 2, 2 } }, - { "Turks", { 32, 8, 2, 1 } }, - { "default", { 8, 16, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } }, - { "Iris", { 32, 16, 2, 1 } }, - { "Iris Pro", { 16, 8, 2, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } }, - { "default", { 32, 16, 2, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 2, 1 } }, - { "GeForce GT 650M", { 32, 16, 2, 2 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 16, 8, 1, 1 } }, - { "GeForce GTX 480", { 32, 8, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2, 2 } }, - { "GeForce GTX 680", { 16, 8, 4, 1 } }, - { "GeForce GTX 750", { 32, 16, 4, 2 } }, - { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } }, - { "GeForce GTX 980", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 16, 8, 1, 2 } }, - { "Tesla K20m", { 32, 8, 2, 1 } }, - { "Tesla K40m", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadComplexSingle = { - "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 16, 16, 2, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 16, 16, 1, 1 } }, - { "Tonga", { 16, 8, 1, 2 } }, - { "Turks", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } }, - { "Iris", { 32, 16, 2, 4 } }, - { "Iris Pro", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 16, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 2, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 2 } }, - { "GeForce GTX 680", { 16, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 8, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 32, 1, 2 } }, - { "Tesla K20m", { 32, 8, 1, 2 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadDouble = { - "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 8, 32, 2, 1 } }, - { "Fiji", { 8, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 1 } }, - { "Tonga", { 32, 8, 4, 1 } }, - { "default", { 16, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 4, 2 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 16, 4, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 32, 2, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 16, 2, 1 } }, - { "GeForce GTX 680", { 32, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 16, 1, 1 } }, - { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 32, 8, 1, 1 } }, - { "Tesla K40m", { 16, 8, 1, 2 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadComplexDouble = { - "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } }, - { "Ellesmere", { 8, 16, 1, 2 } }, - { "Fiji", { 32, 8, 2, 1 } }, - { "Hawaii", { 32, 8, 1, 1 } }, - { "Oland", { 8, 16, 2, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 8, 16, 1, 1 } }, - { "default", { 8, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 4, 1 } }, - { "default", { 16, 8, 4, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 2, 2 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 8, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 8, 32, 1, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 1, 1 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 16, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/pad/pad.hpp b/src/database/kernels/pad/pad.hpp new file mode 100644 index 00000000..bc91c09f --- /dev/null +++ b/src/database/kernels/pad/pad.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad' kernels. +// +// ================================================================================================= + +#include "database/kernels/pad/pad_16.hpp" +#include "database/kernels/pad/pad_32.hpp" +#include "database/kernels/pad/pad_3232.hpp" +#include "database/kernels/pad/pad_64.hpp" +#include "database/kernels/pad/pad_6464.hpp" diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp new file mode 100644 index 00000000..89684314 --- /dev/null +++ b/src/database/kernels/pad/pad_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadHalf = { + "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 16, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp new file mode 100644 index 00000000..b7ba635f --- /dev/null +++ b/src/database/kernels/pad/pad_32.hpp @@ -0,0 +1,162 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadSingle = { + "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 32, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp new file mode 100644 index 00000000..1cf80b5f --- /dev/null +++ b/src/database/kernels/pad/pad_3232.hpp @@ -0,0 +1,161 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadComplexSingle = { + "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 32, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp new file mode 100644 index 00000000..76bcdcf7 --- /dev/null +++ b/src/database/kernels/pad/pad_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadDouble = { + "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 32, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp new file mode 100644 index 00000000..4246495c --- /dev/null +++ b/src/database/kernels/pad/pad_6464.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadComplexDouble = { + "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp deleted file mode 100644 index bbda5c65..00000000 --- a/src/database/kernels/padtranspose.hpp +++ /dev/null @@ -1,361 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeHalf = { - "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 0, 16, 4 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } }, - { "default", { 0, 8, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 8 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 0, 8, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeSingle = { - "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 1, 8, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 1, 16, 4 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 16, 4 } }, - { "Tahiti", { 0, 16, 4 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } }, - { "Iris", { 1, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 32, 2 } }, - { "GeForce GTX 1070", { 0, 16, 1 } }, - { "GeForce GTX 1080", { 1, 16, 2 } }, - { "GeForce GTX 480", { 1, 16, 2 } }, - { "GeForce GTX 670", { 1, 32, 2 } }, - { "GeForce GTX 680", { 1, 16, 2 } }, - { "GeForce GTX 750", { 1, 32, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 2 } }, - { "GeForce GTX TITAN Black", { 1, 32, 2 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 16, 2 } }, - { "Tesla K20m", { 1, 16, 2 } }, - { "Tesla K40m", { 1, 32, 2 } }, - { "default", { 1, 32, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeComplexSingle = { - "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 1, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 16, 2 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 0, 16, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } }, - { "Iris", { 0, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 16, 1 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 8, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeDouble = { - "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "Ellesmere", { 0, 16, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 1, 16, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } }, - { "default", { 1, 8, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 32, 1 } }, - { "GeForce GTX TITAN", { 0, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 0, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeComplexDouble = { - "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 0, 8, 2 } }, - { "Hawaii", { 0, 8, 4 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 8, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 1 } }, - { "default", { 0, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 1, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 32, 1 } }, - { "GeForce GTX 750", { 1, 16, 1 } }, - { "GeForce GTX 750 Ti", { 1, 8, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 1, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 0, 8, 2 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose.hpp b/src/database/kernels/padtranspose/padtranspose.hpp new file mode 100644 index 00000000..c395653a --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. +// +// ================================================================================================= + +#include "database/kernels/padtranspose/padtranspose_16.hpp" +#include "database/kernels/padtranspose/padtranspose_32.hpp" +#include "database/kernels/padtranspose/padtranspose_3232.hpp" +#include "database/kernels/padtranspose/padtranspose_64.hpp" +#include "database/kernels/padtranspose/padtranspose_6464.hpp" diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp new file mode 100644 index 00000000..ea09a062 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeHalf = { + "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp new file mode 100644 index 00000000..6e607768 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -0,0 +1,161 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeSingle = { + "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp new file mode 100644 index 00000000..b959dfaf --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -0,0 +1,161 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeComplexSingle = { + "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp new file mode 100644 index 00000000..3539d474 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeDouble = { + "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp new file mode 100644 index 00000000..7fe6dca3 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeComplexDouble = { + "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp deleted file mode 100644 index b00a23dc..00000000 --- a/src/database/kernels/transpose.hpp +++ /dev/null @@ -1,350 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry TransposeHalf = { - "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 0, 1, 8 } }, - { "default", { 4, 0, 1, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } }, - { "default", { 8, 1, 0, 8 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } }, - { "default", { 8, 0, 0, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 8 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeSingle = { - "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } }, - { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 4 } }, - { "Fiji", { 16, 0, 1, 2 } }, - { "Hawaii", { 4, 0, 1, 8 } }, - { "Oland", { 8, 0, 1, 4 } }, - { "Pitcairn", { 16, 0, 1, 1 } }, - { "Tahiti", { 4, 0, 1, 4 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "Turks", { 8, 0, 1, 2 } }, - { "default", { 8, 0, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 1, 4 } }, - { "default", { 8, 0, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 0, 0, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } }, - { "Iris", { 8, 1, 0, 4 } }, - { "Iris Pro", { 16, 1, 0, 4 } }, - { "default", { 16, 0, 0, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } }, - { "default", { 16, 1, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 1, 1, 2 } }, - { "GeForce GT 650M", { 8, 1, 0, 4 } }, - { "GeForce GTX 1070", { 8, 0, 1, 4 } }, - { "GeForce GTX 1080", { 4, 0, 0, 4 } }, - { "GeForce GTX 480", { 16, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 4, 0, 0, 8 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN X", { 16, 0, 0, 4 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 4 } }, - { "Tesla K20m", { 8, 0, 0, 4 } }, - { "Tesla K40m", { 8, 1, 0, 4 } }, - { "default", { 8, 1, 0, 4 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } }, - { "default", { 8, 1, 1, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeComplexSingle = { - "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } }, - { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 4, 0, 1, 2 } }, - { "Pitcairn", { 8, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 16, 0, 1, 1 } }, - { "Turks", { 8, 1, 1, 4 } }, - { "default", { 8, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 0, 0, 2 } }, - { "default", { 16, 0, 0, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } }, - { "default", { 4, 1, 0, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } }, - { "Iris", { 8, 0, 0, 2 } }, - { "Iris Pro", { 16, 1, 0, 2 } }, - { "default", { 16, 1, 0, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 16, 1, 1, 1 } }, - { "GeForce GTX 1080", { 16, 1, 0, 1 } }, - { "GeForce GTX 480", { 16, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 1, 0, 2 } }, - { "Tesla K20m", { 16, 0, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 1, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeDouble = { - "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 8, 1, 1, 2 } }, - { "Pitcairn", { 4, 0, 1, 2 } }, - { "Tahiti", { 4, 1, 1, 4 } }, - { "Tonga", { 4, 0, 1, 4 } }, - { "default", { 4, 0, 1, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 1, 0, 8 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } }, - { "default", { 32, 1, 0, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 2 } }, - { "GeForce GTX 1070", { 8, 0, 1, 2 } }, - { "GeForce GTX 1080", { 8, 0, 0, 2 } }, - { "GeForce GTX 480", { 8, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 2 } }, - { "GeForce GTX TITAN", { 8, 0, 0, 2 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 16, 1, 0, 2 } }, - { "Tesla K20m", { 16, 1, 0, 2 } }, - { "Tesla K40m", { 16, 1, 1, 2 } }, - { "default", { 16, 1, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeComplexDouble = { - "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 1 } }, - { "Fiji", { 16, 0, 1, 1 } }, - { "Hawaii", { 4, 0, 1, 2 } }, - { "Oland", { 16, 0, 1, 1 } }, - { "Pitcairn", { 4, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "default", { 16, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } }, - { "default", { 4, 0, 0, 8 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 8, 0, 0, 1 } }, - { "GeForce GTX 1080", { 8, 0, 0, 1 } }, - { "GeForce GTX 480", { 8, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 32, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 1 } }, - { "Tesla K20m", { 16, 1, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/transpose/transpose.hpp b/src/database/kernels/transpose/transpose.hpp new file mode 100644 index 00000000..fa262c50 --- /dev/null +++ b/src/database/kernels/transpose/transpose.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. +// +// ================================================================================================= + +#include "database/kernels/transpose/transpose_16.hpp" +#include "database/kernels/transpose/transpose_32.hpp" +#include "database/kernels/transpose/transpose_3232.hpp" +#include "database/kernels/transpose/transpose_64.hpp" +#include "database/kernels/transpose/transpose_6464.hpp" diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp new file mode 100644 index 00000000..e63102d1 --- /dev/null +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeHalf = { + "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 1, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp new file mode 100644 index 00000000..dcc549e2 --- /dev/null +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -0,0 +1,162 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeSingle = { + "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 1, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 16, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 32, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp new file mode 100644 index 00000000..24e02f29 --- /dev/null +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -0,0 +1,153 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeComplexSingle = { + "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp new file mode 100644 index 00000000..ad806d21 --- /dev/null +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeDouble = { + "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp new file mode 100644 index 00000000..3f59b916 --- /dev/null +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -0,0 +1,123 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeComplexDouble = { + "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 4, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp deleted file mode 100644 index 5cb225d1..00000000 --- a/src/database/kernels/xaxpy.hpp +++ /dev/null @@ -1,362 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XaxpyHalf = { - "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 128, 4 } }, - { "default", { 4, 128, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpySingle = { - "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 1, 256, 2 } }, - { "Ellesmere", { 1, 64, 4 } }, - { "Fiji", { 4, 64, 1 } }, - { "Hawaii", { 2, 64, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 2, 128, 1 } }, - { "Tahiti", { 2, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 2, 256, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 4, 256, 1 } }, - { "default", { 4, 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } }, - { "default", { 8, 512, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 128, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 1, 128, 2 } }, - { "default", { 4, 256, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } }, - { "default", { 2, 1024, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 64, 1 } }, - { "GeForce GT 650M", { 2, 1024, 1 } }, - { "GeForce GTX 1070", { 1, 64, 4 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 2, 128, 1 } }, - { "GeForce GTX 670", { 2, 64, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 64, 1 } }, - { "GeForce GTX 750 Ti", { 2, 64, 1 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 4, 256, 1 } }, - { "GeForce GTX TITAN Black", { 4, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 4, 128, 1 } }, - { "Tesla K20m", { 4, 128, 1 } }, - { "Tesla K40m", { 4, 128, 1 } }, - { "default", { 4, 1024, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 128, 2 } }, - { "default", { 4, 128, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyComplexSingle = { - "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } }, - { "ATI Radeon HD 6750M", { 1, 64, 1 } }, - { "Ellesmere", { 2, 256, 1 } }, - { "Fiji", { 1, 128, 2 } }, - { "Hawaii", { 1, 128, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } }, - { "default", { 8, 1024, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 4, 64, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } }, - { "Iris", { 2, 128, 1 } }, - { "Iris Pro", { 1, 256, 8 } }, - { "default", { 4, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 512, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 2, 64, 1 } }, - { "GeForce GTX 480", { 1, 256, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 256, 1 } }, - { "GeForce GTX 750", { 1, 512, 1 } }, - { "GeForce GTX 750 Ti", { 1, 512, 1 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 128, 2 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 128, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyDouble = { - "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 2, 64, 4 } }, - { "Fiji", { 2, 64, 4 } }, - { "Hawaii", { 1, 64, 2 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 128, 4 } }, - { "default", { 2, 64, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 2, 128, 2 } }, - { "default", { 2, 128, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } }, - { "default", { 2, 512, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 8 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 128, 1 } }, - { "GeForce GTX 750 Ti", { 1, 256, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 2, 1024, 1 } }, - { "GeForce GTX TITAN Black", { 2, 128, 1 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 2, 128, 1 } }, - { "Tesla K40m", { 2, 128, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 256, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyComplexDouble = { - "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 64, 1 } }, - { "Hawaii", { 2, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 1, 64, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 64, 8 } }, - { "default", { 1, 64, 8 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } }, - { "default", { 8, 256, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 1024, 1 } }, - { "GeForce GTX 750 Ti", { 1, 64, 2 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 1, 64, 4 } }, - { "GeForce GTX TITAN Black", { 1, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 1024, 1 } }, - { "TITAN X (Pascal)", { 1, 256, 2 } }, - { "Tesla K20m", { 1, 64, 1 } }, - { "Tesla K40m", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 256, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy.hpp b/src/database/kernels/xaxpy/xaxpy.hpp new file mode 100644 index 00000000..aa920183 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. +// +// ================================================================================================= + +#include "database/kernels/xaxpy/xaxpy_16.hpp" +#include "database/kernels/xaxpy/xaxpy_32.hpp" +#include "database/kernels/xaxpy/xaxpy_3232.hpp" +#include "database/kernels/xaxpy/xaxpy_64.hpp" +#include "database/kernels/xaxpy/xaxpy_6464.hpp" diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp new file mode 100644 index 00000000..d102bb52 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyHalf = { + "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp new file mode 100644 index 00000000..38d595db --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -0,0 +1,162 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpySingle = { + "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 2, 1024, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 1024, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 4, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp new file mode 100644 index 00000000..e002c521 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -0,0 +1,161 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 1024, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 4, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 1, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp new file mode 100644 index 00000000..51196633 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyDouble = { + "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 8, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 2048, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp new file mode 100644 index 00000000..5249fe84 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyComplexDouble = { + "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 8, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp deleted file mode 100644 index 986c32b2..00000000 --- a/src/database/kernels/xdot.hpp +++ /dev/null @@ -1,292 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XdotHalf = { - "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 64 } }, - { "default", { 256, 64 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } }, - { "default", { 128, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 64 } }, - { "default", { 64, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotSingle = { - "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } }, - { "ATI Radeon HD 6750M", { 256, 32 } }, - { "Ellesmere", { 128, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 128, 32 } }, - { "Tonga", { 64, 32 } }, - { "Turks", { 128, 64 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } }, - { "default", { 64, 64 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } }, - { "Iris Pro", { 512, 64 } }, - { "default", { 64, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GT 650M", { 128, 64 } }, - { "GeForce GTX 1070", { 128, 1024 } }, - { "GeForce GTX 1080", { 512, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 1024 } }, - { "GeForce GTX 680", { 128, 128 } }, - { "GeForce GTX 750", { 128, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 32 } }, - { "GeForce GTX TITAN Black", { 512, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 1024, 32 } }, - { "Tesla K20m", { 1024, 32 } }, - { "default", { 256, 64 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 64 } }, - { "default", { 128, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotComplexSingle = { - "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "ATI Radeon HD 6750M", { 256, 256 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 64 } }, - { "Oland", { 128, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 64, 32 } }, - { "Tonga", { 256, 64 } }, - { "Turks", { 128, 32 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } }, - { "Iris Pro", { 32, 32 } }, - { "default", { 32, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 32 } }, - { "GeForce GTX 1080", { 128, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 64 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 256, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 512, 64 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 256 } }, - { "default", { 64, 256 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotDouble = { - "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } }, - { "Ellesmere", { 128, 64 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 128, 64 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 64 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GTX 1070", { 128, 512 } }, - { "GeForce GTX 1080", { 128, 128 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 256 } }, - { "GeForce GTX 750 Ti", { 128, 64 } }, - { "GeForce GTX 980", { 128, 32 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 128, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 128, 128 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotComplexDouble = { - "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } }, - { "default", { 128, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 64 } }, - { "GeForce GTX 1080", { 128, 32 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 128 } }, - { "GeForce GTX 680", { 256, 64 } }, - { "GeForce GTX 750", { 256, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 64, 32 } }, - { "GeForce GTX TITAN Black", { 128, 32 } }, - { "GeForce GTX TITAN X", { 128, 32 } }, - { "TITAN X (Pascal)", { 128, 64 } }, - { "Tesla K20m", { 128, 32 } }, - { "default", { 128, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xdot/xdot.hpp b/src/database/kernels/xdot/xdot.hpp new file mode 100644 index 00000000..5d54cdc9 --- /dev/null +++ b/src/database/kernels/xdot/xdot.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. +// +// ================================================================================================= + +#include "database/kernels/xdot/xdot_16.hpp" +#include "database/kernels/xdot/xdot_32.hpp" +#include "database/kernels/xdot/xdot_3232.hpp" +#include "database/kernels/xdot/xdot_64.hpp" +#include "database/kernels/xdot/xdot_6464.hpp" diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp new file mode 100644 index 00000000..ba9307dd --- /dev/null +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotHalf = { + "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp new file mode 100644 index 00000000..933a3d14 --- /dev/null +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -0,0 +1,137 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotSingle = { + "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 512, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 512, 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1024, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 128, 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp new file mode 100644 index 00000000..5ed6baea --- /dev/null +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -0,0 +1,136 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp new file mode 100644 index 00000000..58a67496 --- /dev/null +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -0,0 +1,107 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotDouble = { + "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp new file mode 100644 index 00000000..755ec390 --- /dev/null +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -0,0 +1,107 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotComplexDouble = { + "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 512, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp deleted file mode 100644 index 43854afb..00000000 --- a/src/database/kernels/xgemm.hpp +++ /dev/null @@ -1,348 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemmHalf = { - "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmSingle = { - "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, - { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, - { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, - { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, - { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, - { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, - { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, - { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, - { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, - { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, - { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, - { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, - { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmComplexSingle = { - "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, - { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, - { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, - { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, - { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, - { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, - { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, - { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, - { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, - { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, - { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, - { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDouble = { - "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, - { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, - { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, - { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, - { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, - { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, - { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmComplexDouble = { - "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, - { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, - { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, - { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, - { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, - { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, - { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, - { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm.hpp b/src/database/kernels/xgemm/xgemm.hpp new file mode 100644 index 00000000..d5d382f0 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemm/xgemm_16.hpp" +#include "database/kernels/xgemm/xgemm_32.hpp" +#include "database/kernels/xgemm/xgemm_3232.hpp" +#include "database/kernels/xgemm/xgemm_64.hpp" +#include "database/kernels/xgemm/xgemm_6464.hpp" diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp new file mode 100644 index 00000000..32562415 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmHalf = { + "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp new file mode 100644 index 00000000..26428c2f --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -0,0 +1,162 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmSingle = { + "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { Name{"Iris "}, Params{ 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, + { Name{"Iris Pro "}, Params{ 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { Name{"GeForce GT 650M "}, Params{ 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, + { Name{"GeForce GTX 680 "}, Params{ 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 4 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, + { Name{"Tesla K20m "}, Params{ 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { Name{"Tesla K40m "}, Params{ 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { kDeviceNameDefault , Params{ 16, 8, 32, 16, 64, 32, 16, 64, 1, 0, 1, 0, 2, 2 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 32, 32, 64, 0, 0, 0, 0, 2, 1 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 128, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp new file mode 100644 index 00000000..9b713f36 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -0,0 +1,153 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmComplexSingle = { + "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 32, 32, 32, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, + { Name{"Iris "}, Params{ 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, + { Name{"Iris Pro "}, Params{ 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 16, 64, 32, 16, 128, 1, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, + { Name{"Tesla K20m "}, Params{ 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, + { Name{"Tesla K40m "}, Params{ 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 128, 8, 8, 64, 0, 1, 0, 1, 8, 2 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 32, 16, 16, 64, 1, 1, 1, 0, 1, 2 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 32, 64, 16, 16, 32, 1, 0, 1, 0, 1, 1 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { Name{"GeForce GTX 1080 "}, Params{ 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 4 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp new file mode 100644 index 00000000..cfb78deb --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDouble = { + "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 32, 32, 32, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, + { Name{"GeForce GTX 680 "}, Params{ 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 32, 128, 16, 32, 128, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, + { Name{"Tesla K20m "}, Params{ 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, + { Name{"Tesla K40m "}, Params{ 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 16, 16, 16, 128, 1, 0, 0, 0, 1, 1 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 32, 32, 16, 16, 128, 0, 0, 0, 0, 2, 1 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 8, 16, 16, 16, 16, 16, 128, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp new file mode 100644 index 00000000..f9236718 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -0,0 +1,130 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmComplexDouble = { + "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 32, 0, 0, 0, 0, 2, 4 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, + { Name{"GeForce GTX 680 "}, Params{ 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 8, 16, 16, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, + { Name{"Tesla K20m "}, Params{ 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, + { Name{"Tesla K40m "}, Params{ 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 16, 16, 64, 0, 0, 0, 0, 4, 1 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 32, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 32, 32, 64, 0, 0, 0, 0, 1, 2 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp deleted file mode 100644 index acace63f..00000000 --- a/src/database/kernels/xgemm_direct.hpp +++ /dev/null @@ -1,218 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectHalf = { - "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, - { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectSingle = { - "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } }, - { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, - { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, - { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, - { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, - { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectComplexSingle = { - "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } }, - { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectDouble = { - "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, - { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, - { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectComplexDouble = { - "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct.hpp b/src/database/kernels/xgemm_direct/xgemm_direct.hpp new file mode 100644 index 00000000..9a26e7ce --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemm_direct/xgemm_direct_16.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_32.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_3232.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_64.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_6464.hpp" diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp new file mode 100644 index 00000000..49051b7f --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectHalf = { + "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 32, 8, 8, 32, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 8, 8, 32, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 8, 8, 32, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp new file mode 100644 index 00000000..8fe7b97a --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -0,0 +1,105 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectSingle = { + "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 2, 8, 8, 32, 32, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 32, 32, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 16, 16, 32, 8, 0, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 16, 32, 8, 0, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 8, 16, 8, 8, 1, 0, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 16, 8, 8, 1, 0, 2, 2, 32, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 1, 8, 64, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 16, 16, 8, 8, 8, 0, 0, 2, 4, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 2, 2, 64, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.0", { + { Name{"GeForce GT 650M "}, Params{ 16, 16, 16, 8, 16, 1, 0, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 16, 8, 16, 1, 0, 2, 2, 32, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 2, 8, 8, 16, 16, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 16, 16, 8, 16, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 8, 8, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp new file mode 100644 index 00000000..592f95c2 --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -0,0 +1,89 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectComplexSingle = { + "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 4, 4, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 16, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 8, 16, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 8, 16, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 16, 16, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp new file mode 100644 index 00000000..ea2acde6 --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -0,0 +1,76 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectDouble = { + "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 16, 16, 8, 16, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 8, 16, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 16, 8, 8, 8, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 8, 8, 8, 8, 0, 0, 1, 4, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 8, 16, 16, 16, 8, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 16, 8, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp new file mode 100644 index 00000000..b8bc380b --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -0,0 +1,76 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectComplexDouble = { + "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 32, 32, 16, 8, 0, 0, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 32, 16, 8, 0, 0, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 32, 8, 0, 0, 1, 1, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 16, 16, 8, 8, 0, 0, 2, 1, 32, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 8, 16, 8, 8, 8, 0, 0, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.5", { + { Name{"GeForce GTX TITAN Black "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp deleted file mode 100644 index c537294a..00000000 --- a/src/database/kernels/xgemv.hpp +++ /dev/null @@ -1,306 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvHalf = { - "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvSingle = { - "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } }, - { "ATI Radeon HD 6750M", { 32, 1 } }, - { "Ellesmere", { 256, 1 } }, - { "Fiji", { 128, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 128, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 128, 2 } }, - { "Turks", { 32, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } }, - { "Iris", { 64, 2 } }, - { "Iris Pro", { 128, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GT 650M", { 256, 1 } }, - { "GeForce GTX 1070", { 128, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 256, 1 } }, - { "GeForce GTX 750", { 256, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 128, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 256, 1 } }, - { "GeForce GTX TITAN X", { 256, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 128, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvComplexSingle = { - "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "ATI Radeon HD 6750M", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 64, 1 } }, - { "Pitcairn", { 64, 1 } }, - { "Tahiti", { 64, 1 } }, - { "Tonga", { 32, 1 } }, - { "Turks", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } }, - { "Iris", { 256, 1 } }, - { "Iris Pro", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 64, 1 } }, - { "GeForce GTX 750", { 128, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvDouble = { - "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 32, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 256, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "GeForce GTX 680", { 128, 1 } }, - { "GeForce GTX 750", { 64, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 64, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "GeForce GTX TITAN X", { 64, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 256, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvComplexDouble = { - "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 64, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv.hpp b/src/database/kernels/xgemv/xgemv.hpp new file mode 100644 index 00000000..081c995f --- /dev/null +++ b/src/database/kernels/xgemv/xgemv.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv/xgemv_16.hpp" +#include "database/kernels/xgemv/xgemv_32.hpp" +#include "database/kernels/xgemv/xgemv_3232.hpp" +#include "database/kernels/xgemv/xgemv_64.hpp" +#include "database/kernels/xgemv/xgemv_6464.hpp" diff --git a/src/database/kernels/xgemv/xgemv_16.hpp b/src/database/kernels/xgemv/xgemv_16.hpp new file mode 100644 index 00000000..0db7d960 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_16.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvHalf = { + "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp new file mode 100644 index 00000000..8effec20 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -0,0 +1,153 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvSingle = { + "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp new file mode 100644 index 00000000..747cc1eb --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -0,0 +1,137 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvComplexSingle = { + "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp new file mode 100644 index 00000000..e7b98b64 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvDouble = { + "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp new file mode 100644 index 00000000..dda5c34b --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -0,0 +1,98 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvComplexDouble = { + "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp deleted file mode 100644 index c3b9103a..00000000 --- a/src/database/kernels/xgemv_fast.hpp +++ /dev/null @@ -1,300 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastHalf = { - "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 1, 32, 1 } }, - { "default", { 1, 32, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastSingle = { - "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 2, 64, 2 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 64, 2 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 16, 4 } }, - { "Turks", { 1, 256, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } }, - { "default", { 4, 128, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } }, - { "Iris", { 1, 128, 2 } }, - { "Iris Pro", { 4, 64, 4 } }, - { "default", { 2, 256, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 256, 2 } }, - { "GeForce GT 650M", { 2, 32, 2 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 2, 256, 2 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 256, 1 } }, - { "GeForce GTX 750 Ti", { 2, 32, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 1, 64, 1 } }, - { "Tesla K20m", { 1, 256, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 4 } }, - { "default", { 1, 64, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastComplexSingle = { - "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } }, - { "ATI Radeon HD 6750M", { 1, 128, 1 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } }, - { "default", { 1, 64, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 2, 128, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 4, 128, 4 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 64, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastDouble = { - "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 32, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } }, - { "default", { 1, 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 32, 2 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 128, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 2, 256, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 128, 1 } }, - { "TITAN X (Pascal)", { 1, 32, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastComplexDouble = { - "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 16, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 32, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } }, - { "default", { 4, 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast.hpp b/src/database/kernels/xgemv_fast/xgemv_fast.hpp new file mode 100644 index 00000000..5cc2ca6e --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv_fast/xgemv_fast_16.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_32.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_3232.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_64.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_6464.hpp" diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp new file mode 100644 index 00000000..cad97e20 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastHalf = { + "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp new file mode 100644 index 00000000..1c0bc49d --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -0,0 +1,153 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastSingle = { + "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 1, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp new file mode 100644 index 00000000..754d5a17 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -0,0 +1,125 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastComplexSingle = { + "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp new file mode 100644 index 00000000..fb08a3c4 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastDouble = { + "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K20m "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Tesla K40m "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.2", { + { Name{"GeForce GTX 980 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN X "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp new file mode 100644 index 00000000..03725979 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -0,0 +1,98 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastComplexDouble = { + "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "default", { + { Name{"Intel(R) Many Integrated Core Acceleration Card "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp deleted file mode 100644 index 7e5905e4..00000000 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ /dev/null @@ -1,213 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotHalf = { - "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 32 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } }, - { "default", { 8, 128, 32 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 128, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotSingle = { - "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } }, - { "ATI Radeon HD 6750M", { 8, 128, 16 } }, - { "Ellesmere", { 8, 32, 32 } }, - { "Fiji", { 4, 32, 16 } }, - { "Tonga", { 8, 128, 32 } }, - { "Turks", { 8, 128, 16 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 4, 64, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 8, 32, 16 } }, - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 8, 32, 32 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 4, 128, 16 } }, - { "TITAN X (Pascal)", { 8, 64, 32 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 64, 16 } }, - { "default", { 4, 64, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 32, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotComplexSingle = { - "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } }, - { "ATI Radeon HD 6750M", { 8, 32, 8 } }, - { "Ellesmere", { 2, 32, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 32, 32 } }, - { "Turks", { 4, 32, 8 } }, - { "default", { 8, 16, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } }, - { "default", { 4, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 2, 32, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotDouble = { - "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 16, 16 } }, - { "default", { 4, 16, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 4, 32, 16 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 1, 16, 16 } }, - { "TITAN X (Pascal)", { 8, 32, 32 } }, - { "default", { 4, 32, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotComplexDouble = { - "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 8 } }, - { "Tonga", { 4, 16, 8 } }, - { "default", { 8, 32, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } }, - { "default", { 8, 16, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp new file mode 100644 index 00000000..7379eba9 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp" diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp new file mode 100644 index 00000000..969df688 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotHalf = { + "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp new file mode 100644 index 00000000..ce80a9f7 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -0,0 +1,110 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotSingle = { + "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 8, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.0", { + { Name{"GeForce GT 650M "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 4, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp new file mode 100644 index 00000000..f047b91a --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -0,0 +1,77 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotComplexSingle = { + "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 4, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp new file mode 100644 index 00000000..5f09ef95 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -0,0 +1,78 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotDouble = { + "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 Ti "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp new file mode 100644 index 00000000..10d3ee43 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -0,0 +1,57 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotComplexDouble = { + "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp deleted file mode 100644 index e17396f6..00000000 --- a/src/database/kernels/xger.hpp +++ /dev/null @@ -1,316 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator <database.py> -// -// This file populates the database with best-found tuning parameters for the 'Xger' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgerHalf = { - "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 64, 1, 2 } }, - { "default", { 64, 1, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } }, - { "default", { 4, 8, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 4, 2 } }, - { "default", { 64, 4, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerSingle = { - "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 4 } }, - { "Ellesmere", { 64, 4, 2 } }, - { "Fiji", { 256, 1, 1 } }, - { "Hawaii", { 64, 2, 1 } }, - { "Oland", { 32, 4, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 256, 1, 1 } }, - { "Tonga", { 256, 1, 2 } }, - { "Turks", { 64, 4, 2 } }, - { "default", { 16, 16, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 4 } }, - { "default", { 64, 4, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } }, - { "default", { 128, 8, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } }, - { "Iris Pro", { 64, 1, 4 } }, - { "default", { 32, 4, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1, 2 } }, - { "GeForce GT 650M", { 32, 16, 4 } }, - { "GeForce GTX 1070", { 512, 1, 1 } }, - { "GeForce GTX 1080", { 16, 4, 1 } }, - { "GeForce GTX 480", { 256, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2 } }, - { "GeForce GTX 680", { 128, 1, 4 } }, - { "GeForce GTX 750", { 64, 16, 4 } }, - { "GeForce GTX 750 Ti", { 64, 1, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 512, 2, 1 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 1, 2 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 4, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerComplexSingle = { - "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 1 } }, - { "Ellesmere", { 16, 8, 2 } }, - { "Fiji", { 128, 2, 1 } }, - { "Hawaii", { 64, 1, 2 } }, - { "Oland", { 4, 8, 1 } }, - { "Pitcairn", { 128, 2, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 64, 1, 1 } }, - { "Turks", { 128, 2, 1 } }, - { "default", { 128, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 128, 1, 1 } }, - { "default", { 128, 1, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } }, - { "default", { 256, 2, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } }, - { "Iris Pro", { 16, 2, 4 } }, - { "default", { 128, 2, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 4, 2 } }, - { "GeForce GTX 1070", { 16, 64, 2 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 128, 2, 2 } }, - { "GeForce GTX 670", { 16, 32, 2 } }, - { "GeForce GTX 680", { 32, 4, 2 } }, - { "GeForce GTX 750", { 32, 16, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 16, 16, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 2, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1, 4 } }, - { "default", { 64, 1, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerDouble = { - "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } }, - { "Ellesmere", { 64, 1, 4 } }, - { "Fiji", { 256, 1, 2 } }, - { "Hawaii", { 32, 4, 2 } }, - { "Oland", { 128, 1, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 8, 16, 2 } }, - { "default", { 128, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 1 } }, - { "default", { 64, 4, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } }, - { "default", { 256, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 8, 2 } }, - { "GeForce GTX 1070", { 32, 8, 1 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 32, 4, 2 } }, - { "GeForce GTX 670", { 32, 32, 2 } }, - { "GeForce GTX 680", { 128, 4, 2 } }, - { "GeForce GTX 750", { 256, 2, 2 } }, - { "GeForce GTX 750 Ti", { 32, 16, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerComplexDouble = { - "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } }, - { "Ellesmere", { 8, 16, 1 } }, - { "Fiji", { 64, 4, 2 } }, - { "Hawaii", { 128, 1, 1 } }, - { "Oland", { 16, 16, 2 } }, - { "Pitcairn", { 64, 4, 1 } }, - { "Tahiti", { 32, 4, 1 } }, - { "Tonga", { 16, 4, 1 } }, - { "default", { 32, 4, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 2, 4 } }, - { "default", { 64, 2, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } }, - { "default", { 256, 2, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 2 } }, - { "GeForce GTX 1070", { 8, 128, 1 } }, - { "GeForce GTX 1080", { 8, 4, 1 } }, - { "GeForce GTX 480", { 64, 2, 2 } }, - { "GeForce GTX 670", { 8, 16, 2 } }, - { "GeForce GTX 680", { 8, 16, 1 } }, - { "GeForce GTX 750", { 8, 32, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 4, 8, 1 } }, - { "default", { 16, 8, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xger/xger.hpp b/src/database/kernels/xger/xger.hpp new file mode 100644 index 00000000..284d1fc6 --- /dev/null +++ b/src/database/kernels/xger/xger.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger' kernels. +// +// ================================================================================================= + +#include "database/kernels/xger/xger_16.hpp" +#include "database/kernels/xger/xger_32.hpp" +#include "database/kernels/xger/xger_3232.hpp" +#include "database/kernels/xger/xger_64.hpp" +#include "database/kernels/xger/xger_6464.hpp" diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp new file mode 100644 index 00000000..8d261835 --- /dev/null +++ b/src/database/kernels/xger/xger_16.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerHalf = { + "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp new file mode 100644 index 00000000..e4e2024c --- /dev/null +++ b/src/database/kernels/xger/xger_32.hpp @@ -0,0 +1,145 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerSingle = { + "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 16, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 128, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 128, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GT 650M "}, Params{ 32, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 64, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp new file mode 100644 index 00000000..4b67b9da --- /dev/null +++ b/src/database/kernels/xger/xger_3232.hpp @@ -0,0 +1,144 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerComplexSingle = { + "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Turks", { + { Name{"AMD Radeon HD 6770M "}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Vancouver", { + { Name{"ATI Radeon HD 6750M "}, Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 512, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Iris Pro "}, Params{ 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 16, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 32, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 16, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "default", { + { Name{"QUALCOMM Adreno(TM) "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp new file mode 100644 index 00000000..cea7b076 --- /dev/null +++ b/src/database/kernels/xger/xger_64.hpp @@ -0,0 +1,115 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerDouble = { + "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 8, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 512, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 128, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 32, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 128, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp new file mode 100644 index 00000000..841c5c8e --- /dev/null +++ b/src/database/kernels/xger/xger_6464.hpp @@ -0,0 +1,115 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerComplexDouble = { + "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { + { Name{"AMD Radeon RX 480 "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Fiji", { + { Name{"AMD Radeon R9 Fury X "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon R9 M370X Compute Engine "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Hawaii", { + { Name{"AMD Radeon R9 290X "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Oland", { + { Name{"Oland "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Pitcairn", { + { Name{"AMD Radeon R9 270X "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tahiti", { + { Name{"AMD Radeon HD 7970 "}, Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "Tonga", { + { Name{"AMD Radeon R9 380 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T628 "}, Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 512, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM2.0", { + { Name{"GeForce GTX 480 "}, Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.0", { + { Name{"GRID K520 "}, Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 670 "}, Params{ 8, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 680 "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM3.5", { + { Name{"GeForce GTX TITAN "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX TITAN Black "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM5.0", { + { Name{"GeForce GTX 750 "}, Params{ 8, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1080 "}, Params{ 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"TITAN X (Pascal) "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp index 6633b8b7..2d6afbea 100644 --- a/src/database/kernels/xtrsv.hpp +++ b/src/database/kernels/xtrsv.hpp @@ -15,11 +15,11 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry XtrsvHalf = { +const DatabaseEntry XtrsvHalf = { "Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -27,11 +27,11 @@ const Database::DatabaseEntry XtrsvHalf = { // ================================================================================================= -const Database::DatabaseEntry XtrsvSingle = { +const DatabaseEntry XtrsvSingle = { "Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -39,11 +39,11 @@ const Database::DatabaseEntry XtrsvSingle = { // ================================================================================================= -const Database::DatabaseEntry XtrsvComplexSingle = { +const DatabaseEntry XtrsvComplexSingle = { "Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -51,11 +51,11 @@ const Database::DatabaseEntry XtrsvComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry XtrsvDouble = { +const DatabaseEntry XtrsvDouble = { "Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } @@ -63,11 +63,11 @@ const Database::DatabaseEntry XtrsvDouble = { // ================================================================================================= -const Database::DatabaseEntry XtrsvComplexDouble = { +const DatabaseEntry XtrsvComplexDouble = { "Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, } }, } |