summaryrefslogtreecommitdiff
path: root/src/database/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-09-06 21:50:42 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-09-06 21:50:42 +0200
commit20da5e33a86eda746c17cbdb7bfd295d9f92f074 (patch)
treed35e7091ddc8bbd81d581c4bd49468c6329111fd /src/database/kernels
parentbb947890dec90712c92028c20234eafd48e6fa3e (diff)
Split the database files over multiple directories and files; first step towards separate compilation
Diffstat (limited to 'src/database/kernels')
-rw-r--r--src/database/kernels/copy.hpp354
-rw-r--r--src/database/kernels/copy/copy.hpp14
-rw-r--r--src/database/kernels/copy/copy_16.hpp43
-rw-r--r--src/database/kernels/copy/copy_32.hpp101
-rw-r--r--src/database/kernels/copy/copy_3232.hpp92
-rw-r--r--src/database/kernels/copy/copy_64.hpp80
-rw-r--r--src/database/kernels/copy/copy_6464.hpp80
-rw-r--r--src/database/kernels/invert.hpp10
-rw-r--r--src/database/kernels/pad.hpp362
-rw-r--r--src/database/kernels/pad/pad.hpp14
-rw-r--r--src/database/kernels/pad/pad_16.hpp43
-rw-r--r--src/database/kernels/pad/pad_32.hpp101
-rw-r--r--src/database/kernels/pad/pad_3232.hpp100
-rw-r--r--src/database/kernels/pad/pad_64.hpp80
-rw-r--r--src/database/kernels/pad/pad_6464.hpp80
-rw-r--r--src/database/kernels/padtranspose.hpp361
-rw-r--r--src/database/kernels/padtranspose/padtranspose.hpp14
-rw-r--r--src/database/kernels/padtranspose/padtranspose_16.hpp43
-rw-r--r--src/database/kernels/padtranspose/padtranspose_32.hpp100
-rw-r--r--src/database/kernels/padtranspose/padtranspose_3232.hpp100
-rw-r--r--src/database/kernels/padtranspose/padtranspose_64.hpp80
-rw-r--r--src/database/kernels/padtranspose/padtranspose_6464.hpp80
-rw-r--r--src/database/kernels/transpose.hpp350
-rw-r--r--src/database/kernels/transpose/transpose.hpp14
-rw-r--r--src/database/kernels/transpose/transpose_16.hpp43
-rw-r--r--src/database/kernels/transpose/transpose_32.hpp101
-rw-r--r--src/database/kernels/transpose/transpose_3232.hpp94
-rw-r--r--src/database/kernels/transpose/transpose_64.hpp80
-rw-r--r--src/database/kernels/transpose/transpose_6464.hpp74
-rw-r--r--src/database/kernels/xaxpy.hpp362
-rw-r--r--src/database/kernels/xaxpy/xaxpy.hpp14
-rw-r--r--src/database/kernels/xaxpy/xaxpy_16.hpp43
-rw-r--r--src/database/kernels/xaxpy/xaxpy_32.hpp101
-rw-r--r--src/database/kernels/xaxpy/xaxpy_3232.hpp100
-rw-r--r--src/database/kernels/xaxpy/xaxpy_64.hpp80
-rw-r--r--src/database/kernels/xaxpy/xaxpy_6464.hpp80
-rw-r--r--src/database/kernels/xdot.hpp292
-rw-r--r--src/database/kernels/xdot/xdot.hpp14
-rw-r--r--src/database/kernels/xdot/xdot_16.hpp43
-rw-r--r--src/database/kernels/xdot/xdot_32.hpp83
-rw-r--r--src/database/kernels/xdot/xdot_3232.hpp82
-rw-r--r--src/database/kernels/xdot/xdot_64.hpp63
-rw-r--r--src/database/kernels/xdot/xdot_6464.hpp63
-rw-r--r--src/database/kernels/xgemm.hpp348
-rw-r--r--src/database/kernels/xgemm/xgemm.hpp14
-rw-r--r--src/database/kernels/xgemm/xgemm_16.hpp36
-rw-r--r--src/database/kernels/xgemm/xgemm_32.hpp101
-rw-r--r--src/database/kernels/xgemm/xgemm_3232.hpp94
-rw-r--r--src/database/kernels/xgemm/xgemm_64.hpp80
-rw-r--r--src/database/kernels/xgemm/xgemm_6464.hpp79
-rw-r--r--src/database/kernels/xgemm_direct.hpp218
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct.hpp14
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_16.hpp36
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_32.hpp66
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp58
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_64.hpp50
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp50
-rw-r--r--src/database/kernels/xgemv.hpp306
-rw-r--r--src/database/kernels/xgemv/xgemv.hpp14
-rw-r--r--src/database/kernels/xgemv/xgemv_16.hpp37
-rw-r--r--src/database/kernels/xgemv/xgemv_32.hpp94
-rw-r--r--src/database/kernels/xgemv/xgemv_3232.hpp83
-rw-r--r--src/database/kernels/xgemv/xgemv_64.hpp73
-rw-r--r--src/database/kernels/xgemv/xgemv_6464.hpp61
-rw-r--r--src/database/kernels/xgemv_fast.hpp300
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast.hpp14
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_16.hpp37
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_32.hpp94
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp77
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_64.hpp73
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp61
-rw-r--r--src/database/kernels/xgemv_fast_rot.hpp213
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp14
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp36
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp71
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp54
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp52
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp42
-rw-r--r--src/database/kernels/xger.hpp316
-rw-r--r--src/database/kernels/xger/xger.hpp14
-rw-r--r--src/database/kernels/xger/xger_16.hpp43
-rw-r--r--src/database/kernels/xger/xger_32.hpp89
-rw-r--r--src/database/kernels/xger/xger_3232.hpp88
-rw-r--r--src/database/kernels/xger/xger_64.hpp69
-rw-r--r--src/database/kernels/xger/xger_6464.hpp69
-rw-r--r--src/database/kernels/xtrsv.hpp10
86 files changed, 4464 insertions, 3792 deletions
diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp
deleted file mode 100644
index e5defb32..00000000
--- a/src/database/kernels/copy.hpp
+++ /dev/null
@@ -1,354 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Copy' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry CopyHalf = {
- "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 16, 8, 4, 4 } },
- { "default", { 16, 8, 4, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } },
- { "default", { 8, 32, 4, 8 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
- { "default", { 32, 8, 8, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 8, 4, 4 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry CopySingle = {
- "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } },
- { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
- { "Ellesmere", { 8, 8, 4, 8 } },
- { "Fiji", { 16, 16, 1, 2 } },
- { "Hawaii", { 32, 8, 2, 2 } },
- { "Oland", { 32, 8, 4, 2 } },
- { "Pitcairn", { 8, 16, 4, 1 } },
- { "Tahiti", { 32, 8, 2, 2 } },
- { "Tonga", { 32, 8, 4, 4 } },
- { "Turks", { 8, 8, 4, 2 } },
- { "default", { 8, 16, 4, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 8, 2, 4 } },
- { "default", { 32, 8, 2, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } },
- { "default", { 32, 16, 8, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } },
- { "Iris", { 16, 8, 1, 2 } },
- { "Iris Pro", { 32, 8, 4, 4 } },
- { "default", { 8, 8, 2, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
- { "default", { 32, 8, 8, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 8, 4, 1 } },
- { "GeForce GT 650M", { 16, 16, 4, 2 } },
- { "GeForce GTX 1070", { 8, 16, 4, 1 } },
- { "GeForce GTX 1080", { 8, 32, 4, 1 } },
- { "GeForce GTX 480", { 8, 8, 4, 1 } },
- { "GeForce GTX 670", { 16, 32, 4, 1 } },
- { "GeForce GTX 680", { 32, 16, 4, 1 } },
- { "GeForce GTX 750", { 32, 8, 2, 2 } },
- { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } },
- { "GeForce GTX 980", { 32, 16, 1, 1 } },
- { "GeForce GTX TITAN", { 32, 8, 2, 4 } },
- { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } },
- { "GeForce GTX TITAN X", { 32, 8, 1, 2 } },
- { "TITAN X (Pascal)", { 8, 32, 4, 1 } },
- { "Tesla K20m", { 8, 8, 4, 4 } },
- { "Tesla K40m", { 8, 8, 4, 2 } },
- { "default", { 8, 32, 4, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
- { "default", { 32, 8, 8, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 8, 4, 4 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry CopyComplexSingle = {
- "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
- { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } },
- { "Ellesmere", { 16, 16, 1, 4 } },
- { "Fiji", { 16, 8, 1, 2 } },
- { "Hawaii", { 32, 8, 1, 2 } },
- { "Oland", { 8, 16, 1, 1 } },
- { "Pitcairn", { 8, 8, 1, 2 } },
- { "Tahiti", { 8, 8, 2, 2 } },
- { "Tonga", { 8, 32, 1, 2 } },
- { "Turks", { 32, 8, 4, 1 } },
- { "default", { 16, 8, 1, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } },
- { "default", { 32, 8, 8, 1 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } },
- { "Iris", { 16, 8, 1, 2 } },
- { "Iris Pro", { 32, 16, 1, 4 } },
- { "default", { 16, 8, 1, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
- { "default", { 32, 8, 4, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 8, 1, 1 } },
- { "GeForce GTX 1070", { 16, 8, 1, 1 } },
- { "GeForce GTX 1080", { 32, 8, 1, 2 } },
- { "GeForce GTX 480", { 16, 16, 1, 1 } },
- { "GeForce GTX 670", { 16, 8, 1, 1 } },
- { "GeForce GTX 750", { 16, 8, 1, 2 } },
- { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
- { "GeForce GTX 980", { 8, 8, 1, 1 } },
- { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
- { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
- { "TITAN X (Pascal)", { 8, 16, 2, 1 } },
- { "Tesla K20m", { 8, 8, 1, 4 } },
- { "Tesla K40m", { 16, 8, 1, 1 } },
- { "default", { 32, 8, 1, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } },
- { "default", { 32, 8, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 8, 1, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry CopyDouble = {
- "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
- { "Ellesmere", { 32, 8, 1, 4 } },
- { "Fiji", { 16, 8, 1, 2 } },
- { "Hawaii", { 32, 8, 1, 2 } },
- { "Oland", { 32, 8, 2, 8 } },
- { "Pitcairn", { 32, 8, 1, 1 } },
- { "Tahiti", { 8, 32, 2, 1 } },
- { "Tonga", { 8, 32, 2, 4 } },
- { "default", { 16, 8, 2, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 8, 8, 2 } },
- { "default", { 16, 8, 8, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } },
- { "default", { 16, 8, 8, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } },
- { "default", { 8, 8, 8, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 32, 16, 2, 1 } },
- { "GeForce GTX 1070", { 8, 8, 4, 1 } },
- { "GeForce GTX 1080", { 8, 8, 4, 1 } },
- { "GeForce GTX 480", { 8, 8, 2, 1 } },
- { "GeForce GTX 670", { 8, 8, 2, 1 } },
- { "GeForce GTX 680", { 16, 32, 2, 1 } },
- { "GeForce GTX 750", { 8, 16, 2, 1 } },
- { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } },
- { "GeForce GTX 980", { 32, 8, 2, 1 } },
- { "GeForce GTX TITAN", { 16, 32, 2, 2 } },
- { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } },
- { "GeForce GTX TITAN X", { 32, 16, 1, 1 } },
- { "TITAN X (Pascal)", { 8, 8, 2, 2 } },
- { "Tesla K20m", { 8, 8, 2, 1 } },
- { "Tesla K40m", { 8, 8, 2, 2 } },
- { "default", { 32, 32, 2, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 8, 2, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry CopyComplexDouble = {
- "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } },
- { "Ellesmere", { 8, 32, 1, 2 } },
- { "Fiji", { 8, 16, 1, 1 } },
- { "Hawaii", { 32, 8, 2, 8 } },
- { "Oland", { 8, 16, 1, 1 } },
- { "Pitcairn", { 16, 8, 1, 1 } },
- { "Tahiti", { 8, 16, 1, 1 } },
- { "Tonga", { 16, 8, 2, 1 } },
- { "default", { 8, 16, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 8, 1, 2 } },
- { "default", { 32, 8, 1, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } },
- { "default", { 16, 8, 8, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
- { "default", { 32, 8, 8, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 8, 8, 1, 1 } },
- { "GeForce GTX 1070", { 8, 32, 1, 4 } },
- { "GeForce GTX 1080", { 8, 8, 1, 1 } },
- { "GeForce GTX 480", { 16, 8, 1, 1 } },
- { "GeForce GTX 670", { 16, 8, 1, 1 } },
- { "GeForce GTX 680", { 8, 8, 1, 1 } },
- { "GeForce GTX 750", { 32, 8, 1, 1 } },
- { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } },
- { "GeForce GTX 980", { 8, 8, 1, 1 } },
- { "GeForce GTX TITAN", { 16, 16, 1, 1 } },
- { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } },
- { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
- { "TITAN X (Pascal)", { 8, 8, 1, 2 } },
- { "Tesla K20m", { 8, 8, 1, 2 } },
- { "Tesla K40m", { 8, 8, 1, 1 } },
- { "default", { 8, 8, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 8, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/copy/copy.hpp b/src/database/kernels/copy/copy.hpp
new file mode 100644
index 00000000..8c6e7e03
--- /dev/null
+++ b/src/database/kernels/copy/copy.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/copy/copy_16.hpp"
+#include "database/kernels/copy/copy_32.hpp"
+#include "database/kernels/copy/copy_3232.hpp"
+#include "database/kernels/copy/copy_64.hpp"
+#include "database/kernels/copy/copy_6464.hpp"
diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp
new file mode 100644
index 00000000..dea61ca6
--- /dev/null
+++ b/src/database/kernels/copy/copy_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry CopyHalf = {
+ "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 16, 8, 4, 4 } },
+ { "default", { 16, 8, 4, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } },
+ { "default", { 8, 32, 4, 8 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
+ { "default", { 32, 8, 8, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 8, 4, 4 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp
new file mode 100644
index 00000000..254c2b38
--- /dev/null
+++ b/src/database/kernels/copy/copy_32.hpp
@@ -0,0 +1,101 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry CopySingle = {
+ "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } },
+ { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
+ { "Ellesmere", { 8, 8, 4, 8 } },
+ { "Fiji", { 16, 16, 1, 2 } },
+ { "Hawaii", { 32, 8, 2, 2 } },
+ { "Oland", { 32, 8, 4, 2 } },
+ { "Pitcairn", { 8, 16, 4, 1 } },
+ { "Tahiti", { 32, 8, 2, 2 } },
+ { "Tonga", { 32, 8, 4, 4 } },
+ { "Turks", { 8, 8, 4, 2 } },
+ { "default", { 8, 16, 4, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 8, 2, 4 } },
+ { "default", { 32, 8, 2, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } },
+ { "default", { 32, 16, 8, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } },
+ { "Iris", { 16, 8, 1, 2 } },
+ { "Iris Pro", { 32, 8, 4, 4 } },
+ { "default", { 8, 8, 2, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
+ { "default", { 32, 8, 8, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 8, 4, 1 } },
+ { "GeForce GT 650M", { 16, 16, 4, 2 } },
+ { "GeForce GTX 1070", { 8, 16, 4, 1 } },
+ { "GeForce GTX 1080", { 8, 32, 4, 1 } },
+ { "GeForce GTX 480", { 8, 8, 4, 1 } },
+ { "GeForce GTX 670", { 16, 32, 4, 1 } },
+ { "GeForce GTX 680", { 32, 16, 4, 1 } },
+ { "GeForce GTX 750", { 32, 8, 2, 2 } },
+ { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } },
+ { "GeForce GTX 980", { 32, 16, 1, 1 } },
+ { "GeForce GTX TITAN", { 32, 8, 2, 4 } },
+ { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } },
+ { "GeForce GTX TITAN X", { 32, 8, 1, 2 } },
+ { "TITAN X (Pascal)", { 8, 32, 4, 1 } },
+ { "Tesla K20m", { 8, 8, 4, 4 } },
+ { "Tesla K40m", { 8, 8, 4, 2 } },
+ { "default", { 8, 32, 4, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
+ { "default", { 32, 8, 8, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 8, 4, 4 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp
new file mode 100644
index 00000000..7af25017
--- /dev/null
+++ b/src/database/kernels/copy/copy_3232.hpp
@@ -0,0 +1,92 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry CopyComplexSingle = {
+ "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
+ { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } },
+ { "Ellesmere", { 16, 16, 1, 4 } },
+ { "Fiji", { 16, 8, 1, 2 } },
+ { "Hawaii", { 32, 8, 1, 2 } },
+ { "Oland", { 8, 16, 1, 1 } },
+ { "Pitcairn", { 8, 8, 1, 2 } },
+ { "Tahiti", { 8, 8, 2, 2 } },
+ { "Tonga", { 8, 32, 1, 2 } },
+ { "Turks", { 32, 8, 4, 1 } },
+ { "default", { 16, 8, 1, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } },
+ { "default", { 32, 8, 8, 1 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } },
+ { "Iris", { 16, 8, 1, 2 } },
+ { "Iris Pro", { 32, 16, 1, 4 } },
+ { "default", { 16, 8, 1, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
+ { "default", { 32, 8, 4, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 8, 1, 1 } },
+ { "GeForce GTX 1070", { 16, 8, 1, 1 } },
+ { "GeForce GTX 1080", { 32, 8, 1, 2 } },
+ { "GeForce GTX 480", { 16, 16, 1, 1 } },
+ { "GeForce GTX 670", { 16, 8, 1, 1 } },
+ { "GeForce GTX 750", { 16, 8, 1, 2 } },
+ { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
+ { "GeForce GTX 980", { 8, 8, 1, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
+ { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
+ { "TITAN X (Pascal)", { 8, 16, 2, 1 } },
+ { "Tesla K20m", { 8, 8, 1, 4 } },
+ { "Tesla K40m", { 16, 8, 1, 1 } },
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } },
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 8, 1, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp
new file mode 100644
index 00000000..5c00407b
--- /dev/null
+++ b/src/database/kernels/copy/copy_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry CopyDouble = {
+ "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
+ { "Ellesmere", { 32, 8, 1, 4 } },
+ { "Fiji", { 16, 8, 1, 2 } },
+ { "Hawaii", { 32, 8, 1, 2 } },
+ { "Oland", { 32, 8, 2, 8 } },
+ { "Pitcairn", { 32, 8, 1, 1 } },
+ { "Tahiti", { 8, 32, 2, 1 } },
+ { "Tonga", { 8, 32, 2, 4 } },
+ { "default", { 16, 8, 2, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 8, 8, 2 } },
+ { "default", { 16, 8, 8, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } },
+ { "default", { 16, 8, 8, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } },
+ { "default", { 8, 8, 8, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 32, 16, 2, 1 } },
+ { "GeForce GTX 1070", { 8, 8, 4, 1 } },
+ { "GeForce GTX 1080", { 8, 8, 4, 1 } },
+ { "GeForce GTX 480", { 8, 8, 2, 1 } },
+ { "GeForce GTX 670", { 8, 8, 2, 1 } },
+ { "GeForce GTX 680", { 16, 32, 2, 1 } },
+ { "GeForce GTX 750", { 8, 16, 2, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } },
+ { "GeForce GTX 980", { 32, 8, 2, 1 } },
+ { "GeForce GTX TITAN", { 16, 32, 2, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } },
+ { "GeForce GTX TITAN X", { 32, 16, 1, 1 } },
+ { "TITAN X (Pascal)", { 8, 8, 2, 2 } },
+ { "Tesla K20m", { 8, 8, 2, 1 } },
+ { "Tesla K40m", { 8, 8, 2, 2 } },
+ { "default", { 32, 32, 2, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 8, 2, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp
new file mode 100644
index 00000000..c7f74855
--- /dev/null
+++ b/src/database/kernels/copy/copy_6464.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Copy6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry CopyComplexDouble = {
+ "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } },
+ { "Ellesmere", { 8, 32, 1, 2 } },
+ { "Fiji", { 8, 16, 1, 1 } },
+ { "Hawaii", { 32, 8, 2, 8 } },
+ { "Oland", { 8, 16, 1, 1 } },
+ { "Pitcairn", { 16, 8, 1, 1 } },
+ { "Tahiti", { 8, 16, 1, 1 } },
+ { "Tonga", { 16, 8, 2, 1 } },
+ { "default", { 8, 16, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 8, 1, 2 } },
+ { "default", { 32, 8, 1, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } },
+ { "default", { 16, 8, 8, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
+ { "default", { 32, 8, 8, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 8, 8, 1, 1 } },
+ { "GeForce GTX 1070", { 8, 32, 1, 4 } },
+ { "GeForce GTX 1080", { 8, 8, 1, 1 } },
+ { "GeForce GTX 480", { 16, 8, 1, 1 } },
+ { "GeForce GTX 670", { 16, 8, 1, 1 } },
+ { "GeForce GTX 680", { 8, 8, 1, 1 } },
+ { "GeForce GTX 750", { 32, 8, 1, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } },
+ { "GeForce GTX 980", { 8, 8, 1, 1 } },
+ { "GeForce GTX TITAN", { 16, 16, 1, 1 } },
+ { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } },
+ { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
+ { "TITAN X (Pascal)", { 8, 8, 1, 2 } },
+ { "Tesla K20m", { 8, 8, 1, 2 } },
+ { "Tesla K40m", { 8, 8, 1, 1 } },
+ { "default", { 8, 8, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 8, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp
index 193d1ab4..e736c864 100644
--- a/src/database/kernels/invert.hpp
+++ b/src/database/kernels/invert.hpp
@@ -15,7 +15,7 @@ namespace clblast {
namespace database {
// =================================================================================================
-const Database::DatabaseEntry InvertHalf = {
+const DatabaseEntry InvertHalf = {
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -27,7 +27,7 @@ const Database::DatabaseEntry InvertHalf = {
// =================================================================================================
-const Database::DatabaseEntry InvertSingle = {
+const DatabaseEntry InvertSingle = {
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -39,7 +39,7 @@ const Database::DatabaseEntry InvertSingle = {
// =================================================================================================
-const Database::DatabaseEntry InvertComplexSingle = {
+const DatabaseEntry InvertComplexSingle = {
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -51,7 +51,7 @@ const Database::DatabaseEntry InvertComplexSingle = {
// =================================================================================================
-const Database::DatabaseEntry InvertDouble = {
+const DatabaseEntry InvertDouble = {
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -63,7 +63,7 @@ const Database::DatabaseEntry InvertDouble = {
// =================================================================================================
-const Database::DatabaseEntry InvertComplexDouble = {
+const DatabaseEntry InvertComplexDouble = {
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp
deleted file mode 100644
index b6ebde43..00000000
--- a/src/database/kernels/pad.hpp
+++ /dev/null
@@ -1,362 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Pad' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry PadHalf = {
- "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 16, 8, 1, 2 } },
- { "default", { 16, 8, 1, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } },
- { "default", { 8, 8, 2, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } },
- { "default", { 16, 8, 4, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 8, 4, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadSingle = {
- "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
- { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } },
- { "Ellesmere", { 32, 8, 2, 2 } },
- { "Fiji", { 16, 16, 1, 2 } },
- { "Hawaii", { 32, 8, 1, 4 } },
- { "Oland", { 8, 8, 1, 2 } },
- { "Pitcairn", { 32, 8, 1, 2 } },
- { "Tahiti", { 32, 8, 1, 2 } },
- { "Tonga", { 16, 16, 2, 2 } },
- { "Turks", { 32, 8, 2, 1 } },
- { "default", { 8, 16, 1, 2 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 8, 1, 4 } },
- { "default", { 32, 8, 1, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } },
- { "default", { 32, 8, 4, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } },
- { "Iris", { 32, 16, 2, 1 } },
- { "Iris Pro", { 16, 8, 2, 1 } },
- { "default", { 32, 8, 4, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } },
- { "default", { 32, 16, 2, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 32, 8, 2, 1 } },
- { "GeForce GT 650M", { 32, 16, 2, 2 } },
- { "GeForce GTX 1070", { 16, 8, 1, 1 } },
- { "GeForce GTX 1080", { 16, 8, 1, 1 } },
- { "GeForce GTX 480", { 32, 8, 1, 4 } },
- { "GeForce GTX 670", { 32, 8, 2, 2 } },
- { "GeForce GTX 680", { 16, 8, 4, 1 } },
- { "GeForce GTX 750", { 32, 16, 4, 2 } },
- { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } },
- { "GeForce GTX 980", { 16, 8, 1, 1 } },
- { "GeForce GTX TITAN", { 32, 8, 2, 1 } },
- { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } },
- { "GeForce GTX TITAN X", { 16, 16, 1, 1 } },
- { "TITAN X (Pascal)", { 16, 8, 1, 2 } },
- { "Tesla K20m", { 32, 8, 2, 1 } },
- { "Tesla K40m", { 32, 8, 1, 1 } },
- { "default", { 32, 8, 4, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } },
- { "default", { 32, 8, 2, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 8, 2, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadComplexSingle = {
- "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
- { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
- { "Ellesmere", { 16, 16, 2, 4 } },
- { "Fiji", { 16, 8, 1, 2 } },
- { "Hawaii", { 32, 8, 1, 2 } },
- { "Oland", { 8, 32, 1, 1 } },
- { "Pitcairn", { 8, 8, 1, 2 } },
- { "Tahiti", { 16, 16, 1, 1 } },
- { "Tonga", { 16, 8, 1, 2 } },
- { "Turks", { 16, 8, 4, 4 } },
- { "default", { 16, 8, 1, 2 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 8, 1, 4 } },
- { "default", { 32, 8, 1, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } },
- { "default", { 32, 8, 4, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } },
- { "Iris", { 32, 16, 2, 4 } },
- { "Iris Pro", { 32, 8, 2, 1 } },
- { "default", { 32, 8, 1, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
- { "default", { 32, 8, 1, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 16, 1, 1 } },
- { "GeForce GTX 1070", { 8, 32, 1, 1 } },
- { "GeForce GTX 1080", { 32, 8, 1, 1 } },
- { "GeForce GTX 480", { 16, 8, 2, 1 } },
- { "GeForce GTX 670", { 16, 8, 1, 2 } },
- { "GeForce GTX 680", { 16, 32, 1, 2 } },
- { "GeForce GTX 750", { 32, 8, 2, 1 } },
- { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } },
- { "GeForce GTX 980", { 16, 16, 1, 1 } },
- { "GeForce GTX TITAN", { 16, 8, 2, 1 } },
- { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } },
- { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
- { "TITAN X (Pascal)", { 32, 32, 1, 2 } },
- { "Tesla K20m", { 32, 8, 1, 2 } },
- { "Tesla K40m", { 16, 8, 1, 1 } },
- { "default", { 32, 8, 1, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } },
- { "default", { 32, 8, 4, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 8, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadDouble = {
- "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
- { "Ellesmere", { 8, 32, 2, 1 } },
- { "Fiji", { 8, 16, 1, 2 } },
- { "Hawaii", { 32, 8, 1, 2 } },
- { "Oland", { 8, 32, 1, 1 } },
- { "Pitcairn", { 8, 8, 1, 2 } },
- { "Tahiti", { 32, 8, 1, 1 } },
- { "Tonga", { 32, 8, 4, 1 } },
- { "default", { 16, 16, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 8, 4, 2 } },
- { "default", { 32, 8, 4, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
- { "default", { 32, 16, 4, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
- { "default", { 32, 8, 1, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 32, 8, 1, 1 } },
- { "GeForce GTX 1070", { 8, 8, 1, 1 } },
- { "GeForce GTX 1080", { 32, 32, 2, 1 } },
- { "GeForce GTX 480", { 16, 8, 1, 1 } },
- { "GeForce GTX 670", { 16, 16, 2, 1 } },
- { "GeForce GTX 680", { 32, 32, 1, 2 } },
- { "GeForce GTX 750", { 32, 16, 1, 1 } },
- { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } },
- { "GeForce GTX 980", { 8, 16, 1, 1 } },
- { "GeForce GTX TITAN", { 32, 8, 1, 1 } },
- { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
- { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
- { "TITAN X (Pascal)", { 8, 32, 4, 1 } },
- { "Tesla K20m", { 32, 8, 1, 1 } },
- { "Tesla K40m", { 16, 8, 1, 2 } },
- { "default", { 32, 8, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 8, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadComplexDouble = {
- "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } },
- { "Ellesmere", { 8, 16, 1, 2 } },
- { "Fiji", { 32, 8, 2, 1 } },
- { "Hawaii", { 32, 8, 1, 1 } },
- { "Oland", { 8, 16, 2, 1 } },
- { "Pitcairn", { 16, 8, 1, 1 } },
- { "Tahiti", { 8, 16, 1, 1 } },
- { "Tonga", { 8, 16, 1, 1 } },
- { "default", { 8, 16, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 8, 4, 1 } },
- { "default", { 16, 8, 4, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
- { "default", { 32, 8, 2, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
- { "default", { 32, 8, 4, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 8, 8, 1, 1 } },
- { "GeForce GTX 1070", { 8, 8, 2, 2 } },
- { "GeForce GTX 1080", { 8, 8, 1, 1 } },
- { "GeForce GTX 480", { 16, 8, 1, 1 } },
- { "GeForce GTX 670", { 32, 8, 1, 1 } },
- { "GeForce GTX 680", { 8, 8, 1, 1 } },
- { "GeForce GTX 750", { 8, 8, 1, 1 } },
- { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
- { "GeForce GTX 980", { 16, 16, 1, 1 } },
- { "GeForce GTX TITAN", { 8, 32, 1, 2 } },
- { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } },
- { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
- { "TITAN X (Pascal)", { 8, 16, 1, 1 } },
- { "Tesla K20m", { 8, 8, 1, 2 } },
- { "Tesla K40m", { 8, 8, 1, 1 } },
- { "default", { 16, 8, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 8, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/pad/pad.hpp b/src/database/kernels/pad/pad.hpp
new file mode 100644
index 00000000..bc91c09f
--- /dev/null
+++ b/src/database/kernels/pad/pad.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/pad/pad_16.hpp"
+#include "database/kernels/pad/pad_32.hpp"
+#include "database/kernels/pad/pad_3232.hpp"
+#include "database/kernels/pad/pad_64.hpp"
+#include "database/kernels/pad/pad_6464.hpp"
diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp
new file mode 100644
index 00000000..8f31c31e
--- /dev/null
+++ b/src/database/kernels/pad/pad_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadHalf = {
+ "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 16, 8, 1, 2 } },
+ { "default", { 16, 8, 1, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } },
+ { "default", { 8, 8, 2, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } },
+ { "default", { 16, 8, 4, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 8, 4, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp
new file mode 100644
index 00000000..eda85e8b
--- /dev/null
+++ b/src/database/kernels/pad/pad_32.hpp
@@ -0,0 +1,101 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadSingle = {
+ "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
+ { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } },
+ { "Ellesmere", { 32, 8, 2, 2 } },
+ { "Fiji", { 16, 16, 1, 2 } },
+ { "Hawaii", { 32, 8, 1, 4 } },
+ { "Oland", { 8, 8, 1, 2 } },
+ { "Pitcairn", { 32, 8, 1, 2 } },
+ { "Tahiti", { 32, 8, 1, 2 } },
+ { "Tonga", { 16, 16, 2, 2 } },
+ { "Turks", { 32, 8, 2, 1 } },
+ { "default", { 8, 16, 1, 2 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 8, 1, 4 } },
+ { "default", { 32, 8, 1, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } },
+ { "default", { 32, 8, 4, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } },
+ { "Iris", { 32, 16, 2, 1 } },
+ { "Iris Pro", { 16, 8, 2, 1 } },
+ { "default", { 32, 8, 4, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } },
+ { "default", { 32, 16, 2, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 32, 8, 2, 1 } },
+ { "GeForce GT 650M", { 32, 16, 2, 2 } },
+ { "GeForce GTX 1070", { 16, 8, 1, 1 } },
+ { "GeForce GTX 1080", { 16, 8, 1, 1 } },
+ { "GeForce GTX 480", { 32, 8, 1, 4 } },
+ { "GeForce GTX 670", { 32, 8, 2, 2 } },
+ { "GeForce GTX 680", { 16, 8, 4, 1 } },
+ { "GeForce GTX 750", { 32, 16, 4, 2 } },
+ { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } },
+ { "GeForce GTX 980", { 16, 8, 1, 1 } },
+ { "GeForce GTX TITAN", { 32, 8, 2, 1 } },
+ { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } },
+ { "GeForce GTX TITAN X", { 16, 16, 1, 1 } },
+ { "TITAN X (Pascal)", { 16, 8, 1, 2 } },
+ { "Tesla K20m", { 32, 8, 2, 1 } },
+ { "Tesla K40m", { 32, 8, 1, 1 } },
+ { "default", { 32, 8, 4, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } },
+ { "default", { 32, 8, 2, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 8, 2, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp
new file mode 100644
index 00000000..bc6ee662
--- /dev/null
+++ b/src/database/kernels/pad/pad_3232.hpp
@@ -0,0 +1,100 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadComplexSingle = {
+ "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
+ { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
+ { "Ellesmere", { 16, 16, 2, 4 } },
+ { "Fiji", { 16, 8, 1, 2 } },
+ { "Hawaii", { 32, 8, 1, 2 } },
+ { "Oland", { 8, 32, 1, 1 } },
+ { "Pitcairn", { 8, 8, 1, 2 } },
+ { "Tahiti", { 16, 16, 1, 1 } },
+ { "Tonga", { 16, 8, 1, 2 } },
+ { "Turks", { 16, 8, 4, 4 } },
+ { "default", { 16, 8, 1, 2 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 8, 1, 4 } },
+ { "default", { 32, 8, 1, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } },
+ { "default", { 32, 8, 4, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } },
+ { "Iris", { 32, 16, 2, 4 } },
+ { "Iris Pro", { 32, 8, 2, 1 } },
+ { "default", { 32, 8, 1, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 16, 1, 1 } },
+ { "GeForce GTX 1070", { 8, 32, 1, 1 } },
+ { "GeForce GTX 1080", { 32, 8, 1, 1 } },
+ { "GeForce GTX 480", { 16, 8, 2, 1 } },
+ { "GeForce GTX 670", { 16, 8, 1, 2 } },
+ { "GeForce GTX 680", { 16, 32, 1, 2 } },
+ { "GeForce GTX 750", { 32, 8, 2, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } },
+ { "GeForce GTX 980", { 16, 16, 1, 1 } },
+ { "GeForce GTX TITAN", { 16, 8, 2, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } },
+ { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
+ { "TITAN X (Pascal)", { 32, 32, 1, 2 } },
+ { "Tesla K20m", { 32, 8, 1, 2 } },
+ { "Tesla K40m", { 16, 8, 1, 1 } },
+ { "default", { 32, 8, 1, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } },
+ { "default", { 32, 8, 4, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp
new file mode 100644
index 00000000..94008efe
--- /dev/null
+++ b/src/database/kernels/pad/pad_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadDouble = {
+ "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
+ { "Ellesmere", { 8, 32, 2, 1 } },
+ { "Fiji", { 8, 16, 1, 2 } },
+ { "Hawaii", { 32, 8, 1, 2 } },
+ { "Oland", { 8, 32, 1, 1 } },
+ { "Pitcairn", { 8, 8, 1, 2 } },
+ { "Tahiti", { 32, 8, 1, 1 } },
+ { "Tonga", { 32, 8, 4, 1 } },
+ { "default", { 16, 16, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 8, 4, 2 } },
+ { "default", { 32, 8, 4, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
+ { "default", { 32, 16, 4, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 32, 8, 1, 1 } },
+ { "GeForce GTX 1070", { 8, 8, 1, 1 } },
+ { "GeForce GTX 1080", { 32, 32, 2, 1 } },
+ { "GeForce GTX 480", { 16, 8, 1, 1 } },
+ { "GeForce GTX 670", { 16, 16, 2, 1 } },
+ { "GeForce GTX 680", { 32, 32, 1, 2 } },
+ { "GeForce GTX 750", { 32, 16, 1, 1 } },
+ { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } },
+ { "GeForce GTX 980", { 8, 16, 1, 1 } },
+ { "GeForce GTX TITAN", { 32, 8, 1, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
+ { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
+ { "TITAN X (Pascal)", { 8, 32, 4, 1 } },
+ { "Tesla K20m", { 32, 8, 1, 1 } },
+ { "Tesla K40m", { 16, 8, 1, 2 } },
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp
new file mode 100644
index 00000000..43c5a8e0
--- /dev/null
+++ b/src/database/kernels/pad/pad_6464.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Pad6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadComplexDouble = {
+ "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } },
+ { "Ellesmere", { 8, 16, 1, 2 } },
+ { "Fiji", { 32, 8, 2, 1 } },
+ { "Hawaii", { 32, 8, 1, 1 } },
+ { "Oland", { 8, 16, 2, 1 } },
+ { "Pitcairn", { 16, 8, 1, 1 } },
+ { "Tahiti", { 8, 16, 1, 1 } },
+ { "Tonga", { 8, 16, 1, 1 } },
+ { "default", { 8, 16, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 8, 4, 1 } },
+ { "default", { 16, 8, 4, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
+ { "default", { 32, 8, 2, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
+ { "default", { 32, 8, 4, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 8, 8, 1, 1 } },
+ { "GeForce GTX 1070", { 8, 8, 2, 2 } },
+ { "GeForce GTX 1080", { 8, 8, 1, 1 } },
+ { "GeForce GTX 480", { 16, 8, 1, 1 } },
+ { "GeForce GTX 670", { 32, 8, 1, 1 } },
+ { "GeForce GTX 680", { 8, 8, 1, 1 } },
+ { "GeForce GTX 750", { 8, 8, 1, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
+ { "GeForce GTX 980", { 16, 16, 1, 1 } },
+ { "GeForce GTX TITAN", { 8, 32, 1, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } },
+ { "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
+ { "TITAN X (Pascal)", { 8, 16, 1, 1 } },
+ { "Tesla K20m", { 8, 8, 1, 2 } },
+ { "Tesla K40m", { 8, 8, 1, 1 } },
+ { "default", { 16, 8, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 8, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp
deleted file mode 100644
index bbda5c65..00000000
--- a/src/database/kernels/padtranspose.hpp
+++ /dev/null
@@ -1,361 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry PadtransposeHalf = {
- "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 0, 16, 4 } },
- { "default", { 0, 16, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } },
- { "default", { 0, 8, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 0, 8, 8 } },
- { "default", { 0, 8, 8 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 0, 8, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadtransposeSingle = {
- "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
- { "ATI Radeon HD 6750M", { 1, 16, 1 } },
- { "Ellesmere", { 1, 8, 4 } },
- { "Fiji", { 0, 16, 2 } },
- { "Hawaii", { 1, 16, 4 } },
- { "Oland", { 0, 16, 4 } },
- { "Pitcairn", { 0, 16, 4 } },
- { "Tahiti", { 0, 16, 4 } },
- { "Tonga", { 0, 16, 2 } },
- { "Turks", { 1, 16, 1 } },
- { "default", { 0, 16, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 0, 8, 2 } },
- { "default", { 0, 8, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } },
- { "default", { 0, 8, 8 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 1, 16, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } },
- { "Iris", { 1, 16, 2 } },
- { "Iris Pro", { 1, 16, 2 } },
- { "default", { 1, 16, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } },
- { "default", { 0, 16, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 32, 2 } },
- { "GeForce GTX 1070", { 0, 16, 1 } },
- { "GeForce GTX 1080", { 1, 16, 2 } },
- { "GeForce GTX 480", { 1, 16, 2 } },
- { "GeForce GTX 670", { 1, 32, 2 } },
- { "GeForce GTX 680", { 1, 16, 2 } },
- { "GeForce GTX 750", { 1, 32, 2 } },
- { "GeForce GTX 750 Ti", { 1, 32, 2 } },
- { "GeForce GTX 980", { 0, 16, 1 } },
- { "GeForce GTX TITAN", { 1, 16, 2 } },
- { "GeForce GTX TITAN Black", { 1, 32, 2 } },
- { "GeForce GTX TITAN X", { 1, 32, 1 } },
- { "TITAN X (Pascal)", { 1, 16, 2 } },
- { "Tesla K20m", { 1, 16, 2 } },
- { "Tesla K40m", { 1, 32, 2 } },
- { "default", { 1, 32, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 0, 8, 2 } },
- { "default", { 0, 8, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 16, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadtransposeComplexSingle = {
- "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
- { "ATI Radeon HD 6750M", { 1, 16, 1 } },
- { "Ellesmere", { 0, 8, 4 } },
- { "Fiji", { 1, 16, 2 } },
- { "Hawaii", { 0, 16, 2 } },
- { "Oland", { 0, 8, 4 } },
- { "Pitcairn", { 0, 8, 4 } },
- { "Tahiti", { 0, 16, 2 } },
- { "Tonga", { 0, 16, 2 } },
- { "Turks", { 0, 16, 4 } },
- { "default", { 0, 8, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 1, 16, 2 } },
- { "default", { 1, 16, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } },
- { "default", { 0, 8, 8 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 1, 16, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } },
- { "Iris", { 0, 16, 2 } },
- { "Iris Pro", { 1, 16, 2 } },
- { "default", { 1, 16, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } },
- { "default", { 1, 16, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 16, 1 } },
- { "GeForce GTX 1070", { 1, 16, 1 } },
- { "GeForce GTX 1080", { 0, 8, 1 } },
- { "GeForce GTX 480", { 1, 16, 1 } },
- { "GeForce GTX 670", { 1, 16, 1 } },
- { "GeForce GTX 680", { 1, 16, 1 } },
- { "GeForce GTX 750", { 1, 16, 2 } },
- { "GeForce GTX 750 Ti", { 1, 16, 1 } },
- { "GeForce GTX 980", { 0, 16, 1 } },
- { "GeForce GTX TITAN", { 1, 16, 1 } },
- { "GeForce GTX TITAN Black", { 0, 16, 1 } },
- { "GeForce GTX TITAN X", { 1, 32, 1 } },
- { "TITAN X (Pascal)", { 1, 8, 1 } },
- { "Tesla K20m", { 0, 16, 1 } },
- { "Tesla K40m", { 1, 16, 1 } },
- { "default", { 1, 16, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 0, 8, 4 } },
- { "default", { 0, 8, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 8, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadtransposeDouble = {
- "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
- { "Ellesmere", { 0, 16, 4 } },
- { "Fiji", { 0, 16, 2 } },
- { "Hawaii", { 0, 16, 2 } },
- { "Oland", { 0, 16, 4 } },
- { "Pitcairn", { 0, 8, 4 } },
- { "Tahiti", { 1, 16, 2 } },
- { "Tonga", { 0, 8, 2 } },
- { "default", { 0, 16, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 0, 16, 2 } },
- { "default", { 0, 16, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } },
- { "default", { 1, 8, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
- { "default", { 0, 16, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 16, 1 } },
- { "GeForce GTX 1070", { 1, 16, 1 } },
- { "GeForce GTX 1080", { 0, 8, 1 } },
- { "GeForce GTX 480", { 1, 16, 1 } },
- { "GeForce GTX 670", { 1, 16, 1 } },
- { "GeForce GTX 680", { 1, 16, 1 } },
- { "GeForce GTX 750", { 1, 16, 2 } },
- { "GeForce GTX 750 Ti", { 1, 32, 2 } },
- { "GeForce GTX 980", { 1, 32, 1 } },
- { "GeForce GTX TITAN", { 0, 16, 1 } },
- { "GeForce GTX TITAN Black", { 0, 16, 1 } },
- { "GeForce GTX TITAN X", { 1, 32, 1 } },
- { "TITAN X (Pascal)", { 0, 8, 1 } },
- { "Tesla K20m", { 0, 16, 1 } },
- { "Tesla K40m", { 1, 16, 1 } },
- { "default", { 1, 16, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 16, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry PadtransposeComplexDouble = {
- "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } },
- { "Ellesmere", { 0, 8, 4 } },
- { "Fiji", { 0, 8, 2 } },
- { "Hawaii", { 0, 8, 4 } },
- { "Oland", { 0, 8, 4 } },
- { "Pitcairn", { 0, 8, 4 } },
- { "Tahiti", { 0, 8, 2 } },
- { "Tonga", { 0, 8, 2 } },
- { "default", { 0, 8, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 0, 8, 1 } },
- { "default", { 0, 8, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } },
- { "default", { 0, 8, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
- { "default", { 0, 16, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 16, 1 } },
- { "GeForce GTX 1070", { 1, 16, 1 } },
- { "GeForce GTX 1080", { 1, 8, 1 } },
- { "GeForce GTX 480", { 1, 16, 1 } },
- { "GeForce GTX 670", { 1, 16, 1 } },
- { "GeForce GTX 680", { 1, 32, 1 } },
- { "GeForce GTX 750", { 1, 16, 1 } },
- { "GeForce GTX 750 Ti", { 1, 8, 2 } },
- { "GeForce GTX 980", { 0, 16, 1 } },
- { "GeForce GTX TITAN", { 1, 16, 1 } },
- { "GeForce GTX TITAN Black", { 0, 16, 1 } },
- { "GeForce GTX TITAN X", { 1, 32, 1 } },
- { "TITAN X (Pascal)", { 1, 8, 1 } },
- { "Tesla K20m", { 1, 16, 1 } },
- { "Tesla K40m", { 1, 16, 1 } },
- { "default", { 1, 16, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 0, 8, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/padtranspose/padtranspose.hpp b/src/database/kernels/padtranspose/padtranspose.hpp
new file mode 100644
index 00000000..c395653a
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/padtranspose/padtranspose_16.hpp"
+#include "database/kernels/padtranspose/padtranspose_32.hpp"
+#include "database/kernels/padtranspose/padtranspose_3232.hpp"
+#include "database/kernels/padtranspose/padtranspose_64.hpp"
+#include "database/kernels/padtranspose/padtranspose_6464.hpp"
diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp
new file mode 100644
index 00000000..e815ced0
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadtransposeHalf = {
+ "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 0, 16, 4 } },
+ { "default", { 0, 16, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } },
+ { "default", { 0, 8, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 0, 8, 8 } },
+ { "default", { 0, 8, 8 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 0, 8, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp
new file mode 100644
index 00000000..ca04b01e
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose_32.hpp
@@ -0,0 +1,100 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadtransposeSingle = {
+ "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
+ { "ATI Radeon HD 6750M", { 1, 16, 1 } },
+ { "Ellesmere", { 1, 8, 4 } },
+ { "Fiji", { 0, 16, 2 } },
+ { "Hawaii", { 1, 16, 4 } },
+ { "Oland", { 0, 16, 4 } },
+ { "Pitcairn", { 0, 16, 4 } },
+ { "Tahiti", { 0, 16, 4 } },
+ { "Tonga", { 0, 16, 2 } },
+ { "Turks", { 1, 16, 1 } },
+ { "default", { 0, 16, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 0, 8, 2 } },
+ { "default", { 0, 8, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } },
+ { "default", { 0, 8, 8 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 1, 16, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } },
+ { "Iris", { 1, 16, 2 } },
+ { "Iris Pro", { 1, 16, 2 } },
+ { "default", { 1, 16, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } },
+ { "default", { 0, 16, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 32, 2 } },
+ { "GeForce GTX 1070", { 0, 16, 1 } },
+ { "GeForce GTX 1080", { 1, 16, 2 } },
+ { "GeForce GTX 480", { 1, 16, 2 } },
+ { "GeForce GTX 670", { 1, 32, 2 } },
+ { "GeForce GTX 680", { 1, 16, 2 } },
+ { "GeForce GTX 750", { 1, 32, 2 } },
+ { "GeForce GTX 750 Ti", { 1, 32, 2 } },
+ { "GeForce GTX 980", { 0, 16, 1 } },
+ { "GeForce GTX TITAN", { 1, 16, 2 } },
+ { "GeForce GTX TITAN Black", { 1, 32, 2 } },
+ { "GeForce GTX TITAN X", { 1, 32, 1 } },
+ { "TITAN X (Pascal)", { 1, 16, 2 } },
+ { "Tesla K20m", { 1, 16, 2 } },
+ { "Tesla K40m", { 1, 32, 2 } },
+ { "default", { 1, 32, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 0, 8, 2 } },
+ { "default", { 0, 8, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 16, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp
new file mode 100644
index 00000000..bc9425da
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp
@@ -0,0 +1,100 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadtransposeComplexSingle = {
+ "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
+ { "ATI Radeon HD 6750M", { 1, 16, 1 } },
+ { "Ellesmere", { 0, 8, 4 } },
+ { "Fiji", { 1, 16, 2 } },
+ { "Hawaii", { 0, 16, 2 } },
+ { "Oland", { 0, 8, 4 } },
+ { "Pitcairn", { 0, 8, 4 } },
+ { "Tahiti", { 0, 16, 2 } },
+ { "Tonga", { 0, 16, 2 } },
+ { "Turks", { 0, 16, 4 } },
+ { "default", { 0, 8, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 1, 16, 2 } },
+ { "default", { 1, 16, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } },
+ { "default", { 0, 8, 8 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 1, 16, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } },
+ { "Iris", { 0, 16, 2 } },
+ { "Iris Pro", { 1, 16, 2 } },
+ { "default", { 1, 16, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } },
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 16, 1 } },
+ { "GeForce GTX 1070", { 1, 16, 1 } },
+ { "GeForce GTX 1080", { 0, 8, 1 } },
+ { "GeForce GTX 480", { 1, 16, 1 } },
+ { "GeForce GTX 670", { 1, 16, 1 } },
+ { "GeForce GTX 680", { 1, 16, 1 } },
+ { "GeForce GTX 750", { 1, 16, 2 } },
+ { "GeForce GTX 750 Ti", { 1, 16, 1 } },
+ { "GeForce GTX 980", { 0, 16, 1 } },
+ { "GeForce GTX TITAN", { 1, 16, 1 } },
+ { "GeForce GTX TITAN Black", { 0, 16, 1 } },
+ { "GeForce GTX TITAN X", { 1, 32, 1 } },
+ { "TITAN X (Pascal)", { 1, 8, 1 } },
+ { "Tesla K20m", { 0, 16, 1 } },
+ { "Tesla K40m", { 1, 16, 1 } },
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 0, 8, 4 } },
+ { "default", { 0, 8, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 8, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp
new file mode 100644
index 00000000..bdfe9788
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadtransposeDouble = {
+ "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
+ { "Ellesmere", { 0, 16, 4 } },
+ { "Fiji", { 0, 16, 2 } },
+ { "Hawaii", { 0, 16, 2 } },
+ { "Oland", { 0, 16, 4 } },
+ { "Pitcairn", { 0, 8, 4 } },
+ { "Tahiti", { 1, 16, 2 } },
+ { "Tonga", { 0, 8, 2 } },
+ { "default", { 0, 16, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 0, 16, 2 } },
+ { "default", { 0, 16, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } },
+ { "default", { 1, 8, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
+ { "default", { 0, 16, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 16, 1 } },
+ { "GeForce GTX 1070", { 1, 16, 1 } },
+ { "GeForce GTX 1080", { 0, 8, 1 } },
+ { "GeForce GTX 480", { 1, 16, 1 } },
+ { "GeForce GTX 670", { 1, 16, 1 } },
+ { "GeForce GTX 680", { 1, 16, 1 } },
+ { "GeForce GTX 750", { 1, 16, 2 } },
+ { "GeForce GTX 750 Ti", { 1, 32, 2 } },
+ { "GeForce GTX 980", { 1, 32, 1 } },
+ { "GeForce GTX TITAN", { 0, 16, 1 } },
+ { "GeForce GTX TITAN Black", { 0, 16, 1 } },
+ { "GeForce GTX TITAN X", { 1, 32, 1 } },
+ { "TITAN X (Pascal)", { 0, 8, 1 } },
+ { "Tesla K20m", { 0, 16, 1 } },
+ { "Tesla K40m", { 1, 16, 1 } },
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 16, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp
new file mode 100644
index 00000000..c839ab2c
--- /dev/null
+++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Padtranspose6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry PadtransposeComplexDouble = {
+ "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } },
+ { "Ellesmere", { 0, 8, 4 } },
+ { "Fiji", { 0, 8, 2 } },
+ { "Hawaii", { 0, 8, 4 } },
+ { "Oland", { 0, 8, 4 } },
+ { "Pitcairn", { 0, 8, 4 } },
+ { "Tahiti", { 0, 8, 2 } },
+ { "Tonga", { 0, 8, 2 } },
+ { "default", { 0, 8, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 0, 8, 1 } },
+ { "default", { 0, 8, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } },
+ { "default", { 0, 8, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
+ { "default", { 0, 16, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 16, 1 } },
+ { "GeForce GTX 1070", { 1, 16, 1 } },
+ { "GeForce GTX 1080", { 1, 8, 1 } },
+ { "GeForce GTX 480", { 1, 16, 1 } },
+ { "GeForce GTX 670", { 1, 16, 1 } },
+ { "GeForce GTX 680", { 1, 32, 1 } },
+ { "GeForce GTX 750", { 1, 16, 1 } },
+ { "GeForce GTX 750 Ti", { 1, 8, 2 } },
+ { "GeForce GTX 980", { 0, 16, 1 } },
+ { "GeForce GTX TITAN", { 1, 16, 1 } },
+ { "GeForce GTX TITAN Black", { 0, 16, 1 } },
+ { "GeForce GTX TITAN X", { 1, 32, 1 } },
+ { "TITAN X (Pascal)", { 1, 8, 1 } },
+ { "Tesla K20m", { 1, 16, 1 } },
+ { "Tesla K40m", { 1, 16, 1 } },
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 0, 8, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp
deleted file mode 100644
index b00a23dc..00000000
--- a/src/database/kernels/transpose.hpp
+++ /dev/null
@@ -1,350 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Transpose' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry TransposeHalf = {
- "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 4, 0, 1, 8 } },
- { "default", { 4, 0, 1, 8 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } },
- { "default", { 8, 1, 0, 8 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } },
- { "default", { 8, 0, 0, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 0, 1, 8 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry TransposeSingle = {
- "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } },
- { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } },
- { "Ellesmere", { 16, 0, 1, 4 } },
- { "Fiji", { 16, 0, 1, 2 } },
- { "Hawaii", { 4, 0, 1, 8 } },
- { "Oland", { 8, 0, 1, 4 } },
- { "Pitcairn", { 16, 0, 1, 1 } },
- { "Tahiti", { 4, 0, 1, 4 } },
- { "Tonga", { 8, 1, 1, 2 } },
- { "Turks", { 8, 0, 1, 2 } },
- { "default", { 8, 0, 1, 2 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 8, 0, 1, 4 } },
- { "default", { 8, 0, 1, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
- { "default", { 4, 0, 0, 8 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } },
- { "Iris", { 8, 1, 0, 4 } },
- { "Iris Pro", { 16, 1, 0, 4 } },
- { "default", { 16, 0, 0, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } },
- { "default", { 16, 1, 1, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 32, 1, 1, 2 } },
- { "GeForce GT 650M", { 8, 1, 0, 4 } },
- { "GeForce GTX 1070", { 8, 0, 1, 4 } },
- { "GeForce GTX 1080", { 4, 0, 0, 4 } },
- { "GeForce GTX 480", { 16, 1, 0, 2 } },
- { "GeForce GTX 670", { 16, 1, 1, 2 } },
- { "GeForce GTX 680", { 16, 1, 1, 2 } },
- { "GeForce GTX 750", { 4, 0, 0, 8 } },
- { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
- { "GeForce GTX 980", { 16, 0, 0, 1 } },
- { "GeForce GTX TITAN", { 8, 1, 0, 4 } },
- { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } },
- { "GeForce GTX TITAN X", { 16, 0, 0, 4 } },
- { "TITAN X (Pascal)", { 8, 0, 0, 4 } },
- { "Tesla K20m", { 8, 0, 0, 4 } },
- { "Tesla K40m", { 8, 1, 0, 4 } },
- { "default", { 8, 1, 0, 4 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } },
- { "default", { 8, 1, 1, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 0, 1, 4 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry TransposeComplexSingle = {
- "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } },
- { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } },
- { "Ellesmere", { 4, 0, 1, 4 } },
- { "Fiji", { 8, 1, 1, 2 } },
- { "Hawaii", { 16, 0, 1, 1 } },
- { "Oland", { 4, 0, 1, 2 } },
- { "Pitcairn", { 8, 0, 1, 1 } },
- { "Tahiti", { 16, 0, 1, 1 } },
- { "Tonga", { 16, 0, 1, 1 } },
- { "Turks", { 8, 1, 1, 4 } },
- { "default", { 8, 0, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 0, 0, 2 } },
- { "default", { 16, 0, 0, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } },
- { "default", { 4, 1, 0, 8 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } },
- { "Iris", { 8, 0, 0, 2 } },
- { "Iris Pro", { 16, 1, 0, 2 } },
- { "default", { 16, 1, 0, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 1, 1, 1 } },
- { "GeForce GTX 1070", { 16, 1, 1, 1 } },
- { "GeForce GTX 1080", { 16, 1, 0, 1 } },
- { "GeForce GTX 480", { 16, 1, 0, 1 } },
- { "GeForce GTX 670", { 16, 1, 1, 1 } },
- { "GeForce GTX 680", { 16, 1, 1, 1 } },
- { "GeForce GTX 750", { 16, 1, 0, 1 } },
- { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
- { "GeForce GTX 980", { 16, 1, 0, 1 } },
- { "GeForce GTX TITAN", { 16, 0, 0, 1 } },
- { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } },
- { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
- { "TITAN X (Pascal)", { 8, 1, 0, 2 } },
- { "Tesla K20m", { 16, 0, 0, 1 } },
- { "Tesla K40m", { 16, 1, 0, 1 } },
- { "default", { 16, 1, 0, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } },
- { "default", { 16, 1, 0, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 1, 1, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry TransposeDouble = {
- "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } },
- { "Ellesmere", { 4, 0, 1, 4 } },
- { "Fiji", { 8, 1, 1, 2 } },
- { "Hawaii", { 16, 0, 1, 1 } },
- { "Oland", { 8, 1, 1, 2 } },
- { "Pitcairn", { 4, 0, 1, 2 } },
- { "Tahiti", { 4, 1, 1, 4 } },
- { "Tonga", { 4, 0, 1, 4 } },
- { "default", { 4, 0, 1, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 8, 0, 0, 1 } },
- { "default", { 8, 0, 0, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
- { "default", { 4, 1, 0, 8 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } },
- { "default", { 32, 1, 0, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 1, 1, 2 } },
- { "GeForce GTX 1070", { 8, 0, 1, 2 } },
- { "GeForce GTX 1080", { 8, 0, 0, 2 } },
- { "GeForce GTX 480", { 8, 1, 0, 2 } },
- { "GeForce GTX 670", { 16, 1, 1, 2 } },
- { "GeForce GTX 680", { 16, 1, 1, 2 } },
- { "GeForce GTX 750", { 16, 1, 0, 1 } },
- { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
- { "GeForce GTX 980", { 16, 0, 0, 2 } },
- { "GeForce GTX TITAN", { 8, 0, 0, 2 } },
- { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } },
- { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
- { "TITAN X (Pascal)", { 16, 1, 0, 2 } },
- { "Tesla K20m", { 16, 1, 0, 2 } },
- { "Tesla K40m", { 16, 1, 1, 2 } },
- { "default", { 16, 1, 1, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 1, 1, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry TransposeComplexDouble = {
- "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } },
- { "Ellesmere", { 16, 0, 1, 1 } },
- { "Fiji", { 16, 0, 1, 1 } },
- { "Hawaii", { 4, 0, 1, 2 } },
- { "Oland", { 16, 0, 1, 1 } },
- { "Pitcairn", { 4, 0, 1, 1 } },
- { "Tahiti", { 16, 0, 1, 1 } },
- { "Tonga", { 8, 1, 1, 2 } },
- { "default", { 16, 0, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 8, 0, 0, 1 } },
- { "default", { 8, 0, 0, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } },
- { "default", { 4, 0, 0, 8 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 1, 1, 1 } },
- { "GeForce GTX 1070", { 8, 0, 0, 1 } },
- { "GeForce GTX 1080", { 8, 0, 0, 1 } },
- { "GeForce GTX 480", { 8, 1, 0, 1 } },
- { "GeForce GTX 670", { 16, 1, 1, 1 } },
- { "GeForce GTX 680", { 16, 1, 1, 1 } },
- { "GeForce GTX 750", { 16, 1, 0, 1 } },
- { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
- { "GeForce GTX 980", { 32, 1, 0, 1 } },
- { "GeForce GTX TITAN", { 16, 1, 0, 1 } },
- { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } },
- { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
- { "TITAN X (Pascal)", { 8, 0, 0, 1 } },
- { "Tesla K20m", { 16, 1, 0, 1 } },
- { "Tesla K40m", { 16, 1, 0, 1 } },
- { "default", { 16, 1, 0, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 16, 1, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/transpose/transpose.hpp b/src/database/kernels/transpose/transpose.hpp
new file mode 100644
index 00000000..fa262c50
--- /dev/null
+++ b/src/database/kernels/transpose/transpose.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/transpose/transpose_16.hpp"
+#include "database/kernels/transpose/transpose_32.hpp"
+#include "database/kernels/transpose/transpose_3232.hpp"
+#include "database/kernels/transpose/transpose_64.hpp"
+#include "database/kernels/transpose/transpose_6464.hpp"
diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp
new file mode 100644
index 00000000..016788dc
--- /dev/null
+++ b/src/database/kernels/transpose/transpose_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry TransposeHalf = {
+ "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 4, 0, 1, 8 } },
+ { "default", { 4, 0, 1, 8 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } },
+ { "default", { 8, 1, 0, 8 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } },
+ { "default", { 8, 0, 0, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 0, 1, 8 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp
new file mode 100644
index 00000000..abbe8e19
--- /dev/null
+++ b/src/database/kernels/transpose/transpose_32.hpp
@@ -0,0 +1,101 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry TransposeSingle = {
+ "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } },
+ { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } },
+ { "Ellesmere", { 16, 0, 1, 4 } },
+ { "Fiji", { 16, 0, 1, 2 } },
+ { "Hawaii", { 4, 0, 1, 8 } },
+ { "Oland", { 8, 0, 1, 4 } },
+ { "Pitcairn", { 16, 0, 1, 1 } },
+ { "Tahiti", { 4, 0, 1, 4 } },
+ { "Tonga", { 8, 1, 1, 2 } },
+ { "Turks", { 8, 0, 1, 2 } },
+ { "default", { 8, 0, 1, 2 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 8, 0, 1, 4 } },
+ { "default", { 8, 0, 1, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
+ { "default", { 4, 0, 0, 8 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } },
+ { "Iris", { 8, 1, 0, 4 } },
+ { "Iris Pro", { 16, 1, 0, 4 } },
+ { "default", { 16, 0, 0, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } },
+ { "default", { 16, 1, 1, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 32, 1, 1, 2 } },
+ { "GeForce GT 650M", { 8, 1, 0, 4 } },
+ { "GeForce GTX 1070", { 8, 0, 1, 4 } },
+ { "GeForce GTX 1080", { 4, 0, 0, 4 } },
+ { "GeForce GTX 480", { 16, 1, 0, 2 } },
+ { "GeForce GTX 670", { 16, 1, 1, 2 } },
+ { "GeForce GTX 680", { 16, 1, 1, 2 } },
+ { "GeForce GTX 750", { 4, 0, 0, 8 } },
+ { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
+ { "GeForce GTX 980", { 16, 0, 0, 1 } },
+ { "GeForce GTX TITAN", { 8, 1, 0, 4 } },
+ { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } },
+ { "GeForce GTX TITAN X", { 16, 0, 0, 4 } },
+ { "TITAN X (Pascal)", { 8, 0, 0, 4 } },
+ { "Tesla K20m", { 8, 0, 0, 4 } },
+ { "Tesla K40m", { 8, 1, 0, 4 } },
+ { "default", { 8, 1, 0, 4 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } },
+ { "default", { 8, 1, 1, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 0, 1, 4 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp
new file mode 100644
index 00000000..f8560206
--- /dev/null
+++ b/src/database/kernels/transpose/transpose_3232.hpp
@@ -0,0 +1,94 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry TransposeComplexSingle = {
+ "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } },
+ { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } },
+ { "Ellesmere", { 4, 0, 1, 4 } },
+ { "Fiji", { 8, 1, 1, 2 } },
+ { "Hawaii", { 16, 0, 1, 1 } },
+ { "Oland", { 4, 0, 1, 2 } },
+ { "Pitcairn", { 8, 0, 1, 1 } },
+ { "Tahiti", { 16, 0, 1, 1 } },
+ { "Tonga", { 16, 0, 1, 1 } },
+ { "Turks", { 8, 1, 1, 4 } },
+ { "default", { 8, 0, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 0, 0, 2 } },
+ { "default", { 16, 0, 0, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } },
+ { "default", { 4, 1, 0, 8 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } },
+ { "Iris", { 8, 0, 0, 2 } },
+ { "Iris Pro", { 16, 1, 0, 2 } },
+ { "default", { 16, 1, 0, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 1, 1, 1 } },
+ { "GeForce GTX 1070", { 16, 1, 1, 1 } },
+ { "GeForce GTX 1080", { 16, 1, 0, 1 } },
+ { "GeForce GTX 480", { 16, 1, 0, 1 } },
+ { "GeForce GTX 670", { 16, 1, 1, 1 } },
+ { "GeForce GTX 680", { 16, 1, 1, 1 } },
+ { "GeForce GTX 750", { 16, 1, 0, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
+ { "GeForce GTX 980", { 16, 1, 0, 1 } },
+ { "GeForce GTX TITAN", { 16, 0, 0, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } },
+ { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
+ { "TITAN X (Pascal)", { 8, 1, 0, 2 } },
+ { "Tesla K20m", { 16, 0, 0, 1 } },
+ { "Tesla K40m", { 16, 1, 0, 1 } },
+ { "default", { 16, 1, 0, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } },
+ { "default", { 16, 1, 0, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 1, 1, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp
new file mode 100644
index 00000000..ddad3bac
--- /dev/null
+++ b/src/database/kernels/transpose/transpose_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry TransposeDouble = {
+ "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } },
+ { "Ellesmere", { 4, 0, 1, 4 } },
+ { "Fiji", { 8, 1, 1, 2 } },
+ { "Hawaii", { 16, 0, 1, 1 } },
+ { "Oland", { 8, 1, 1, 2 } },
+ { "Pitcairn", { 4, 0, 1, 2 } },
+ { "Tahiti", { 4, 1, 1, 4 } },
+ { "Tonga", { 4, 0, 1, 4 } },
+ { "default", { 4, 0, 1, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 8, 0, 0, 1 } },
+ { "default", { 8, 0, 0, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
+ { "default", { 4, 1, 0, 8 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } },
+ { "default", { 32, 1, 0, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 1, 1, 2 } },
+ { "GeForce GTX 1070", { 8, 0, 1, 2 } },
+ { "GeForce GTX 1080", { 8, 0, 0, 2 } },
+ { "GeForce GTX 480", { 8, 1, 0, 2 } },
+ { "GeForce GTX 670", { 16, 1, 1, 2 } },
+ { "GeForce GTX 680", { 16, 1, 1, 2 } },
+ { "GeForce GTX 750", { 16, 1, 0, 1 } },
+ { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
+ { "GeForce GTX 980", { 16, 0, 0, 2 } },
+ { "GeForce GTX TITAN", { 8, 0, 0, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } },
+ { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
+ { "TITAN X (Pascal)", { 16, 1, 0, 2 } },
+ { "Tesla K20m", { 16, 1, 0, 2 } },
+ { "Tesla K40m", { 16, 1, 1, 2 } },
+ { "default", { 16, 1, 1, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 1, 1, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp
new file mode 100644
index 00000000..95c7fbba
--- /dev/null
+++ b/src/database/kernels/transpose/transpose_6464.hpp
@@ -0,0 +1,74 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Transpose6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry TransposeComplexDouble = {
+ "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } },
+ { "Ellesmere", { 16, 0, 1, 1 } },
+ { "Fiji", { 16, 0, 1, 1 } },
+ { "Hawaii", { 4, 0, 1, 2 } },
+ { "Oland", { 16, 0, 1, 1 } },
+ { "Pitcairn", { 4, 0, 1, 1 } },
+ { "Tahiti", { 16, 0, 1, 1 } },
+ { "Tonga", { 8, 1, 1, 2 } },
+ { "default", { 16, 0, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 8, 0, 0, 1 } },
+ { "default", { 8, 0, 0, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } },
+ { "default", { 4, 0, 0, 8 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 1, 1, 1 } },
+ { "GeForce GTX 1070", { 8, 0, 0, 1 } },
+ { "GeForce GTX 1080", { 8, 0, 0, 1 } },
+ { "GeForce GTX 480", { 8, 1, 0, 1 } },
+ { "GeForce GTX 670", { 16, 1, 1, 1 } },
+ { "GeForce GTX 680", { 16, 1, 1, 1 } },
+ { "GeForce GTX 750", { 16, 1, 0, 1 } },
+ { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
+ { "GeForce GTX 980", { 32, 1, 0, 1 } },
+ { "GeForce GTX TITAN", { 16, 1, 0, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } },
+ { "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
+ { "TITAN X (Pascal)", { 8, 0, 0, 1 } },
+ { "Tesla K20m", { 16, 1, 0, 1 } },
+ { "Tesla K40m", { 16, 1, 0, 1 } },
+ { "default", { 16, 1, 0, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 16, 1, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp
deleted file mode 100644
index 5cb225d1..00000000
--- a/src/database/kernels/xaxpy.hpp
+++ /dev/null
@@ -1,362 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XaxpyHalf = {
- "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 4, 128, 4 } },
- { "default", { 4, 128, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } },
- { "default", { 8, 64, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 8, 64, 1 } },
- { "default", { 8, 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XaxpySingle = {
- "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
- { "ATI Radeon HD 6750M", { 1, 256, 2 } },
- { "Ellesmere", { 1, 64, 4 } },
- { "Fiji", { 4, 64, 1 } },
- { "Hawaii", { 2, 64, 2 } },
- { "Oland", { 1, 128, 1 } },
- { "Pitcairn", { 2, 128, 1 } },
- { "Tahiti", { 2, 64, 1 } },
- { "Tonga", { 1, 256, 8 } },
- { "Turks", { 2, 256, 1 } },
- { "default", { 2, 256, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 4, 256, 1 } },
- { "default", { 4, 256, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } },
- { "default", { 8, 512, 1 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 1, 128, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } },
- { "Iris", { 1, 64, 1 } },
- { "Iris Pro", { 1, 128, 2 } },
- { "default", { 4, 256, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } },
- { "default", { 2, 1024, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 2, 64, 1 } },
- { "GeForce GT 650M", { 2, 1024, 1 } },
- { "GeForce GTX 1070", { 1, 64, 4 } },
- { "GeForce GTX 1080", { 1, 256, 1 } },
- { "GeForce GTX 480", { 2, 128, 1 } },
- { "GeForce GTX 670", { 2, 64, 1 } },
- { "GeForce GTX 680", { 1, 128, 1 } },
- { "GeForce GTX 750", { 1, 64, 1 } },
- { "GeForce GTX 750 Ti", { 2, 64, 1 } },
- { "GeForce GTX 980", { 1, 1024, 1 } },
- { "GeForce GTX TITAN", { 4, 256, 1 } },
- { "GeForce GTX TITAN Black", { 4, 128, 4 } },
- { "GeForce GTX TITAN X", { 1, 64, 1 } },
- { "TITAN X (Pascal)", { 4, 128, 1 } },
- { "Tesla K20m", { 4, 128, 1 } },
- { "Tesla K40m", { 4, 128, 1 } },
- { "default", { 4, 1024, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 4, 128, 2 } },
- { "default", { 4, 128, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 4, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XaxpyComplexSingle = {
- "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } },
- { "ATI Radeon HD 6750M", { 1, 64, 1 } },
- { "Ellesmere", { 2, 256, 1 } },
- { "Fiji", { 1, 128, 2 } },
- { "Hawaii", { 1, 128, 2 } },
- { "Oland", { 1, 128, 1 } },
- { "Pitcairn", { 1, 64, 1 } },
- { "Tahiti", { 1, 64, 1 } },
- { "Tonga", { 1, 256, 8 } },
- { "Turks", { 2, 256, 1 } },
- { "default", { 1, 128, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 1, 256, 1 } },
- { "default", { 1, 256, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } },
- { "default", { 8, 1024, 1 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 4, 64, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } },
- { "Iris", { 2, 128, 1 } },
- { "Iris Pro", { 1, 256, 8 } },
- { "default", { 4, 64, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
- { "default", { 1, 1024, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 512, 1 } },
- { "GeForce GTX 1070", { 1, 64, 2 } },
- { "GeForce GTX 1080", { 2, 64, 1 } },
- { "GeForce GTX 480", { 1, 256, 1 } },
- { "GeForce GTX 670", { 1, 256, 1 } },
- { "GeForce GTX 680", { 1, 256, 1 } },
- { "GeForce GTX 750", { 1, 512, 1 } },
- { "GeForce GTX 750 Ti", { 1, 512, 1 } },
- { "GeForce GTX 980", { 1, 64, 1 } },
- { "GeForce GTX TITAN", { 1, 256, 1 } },
- { "GeForce GTX TITAN Black", { 1, 128, 2 } },
- { "GeForce GTX TITAN X", { 1, 512, 1 } },
- { "TITAN X (Pascal)", { 2, 512, 1 } },
- { "Tesla K20m", { 1, 128, 1 } },
- { "Tesla K40m", { 1, 128, 1 } },
- { "default", { 1, 256, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 128, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XaxpyDouble = {
- "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
- { "Ellesmere", { 2, 64, 4 } },
- { "Fiji", { 2, 64, 4 } },
- { "Hawaii", { 1, 64, 2 } },
- { "Oland", { 1, 64, 1 } },
- { "Pitcairn", { 1, 128, 1 } },
- { "Tahiti", { 1, 64, 1 } },
- { "Tonga", { 1, 128, 4 } },
- { "default", { 2, 64, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 2, 128, 2 } },
- { "default", { 2, 128, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } },
- { "default", { 8, 64, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } },
- { "default", { 2, 512, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 64, 1 } },
- { "GeForce GTX 1070", { 1, 64, 8 } },
- { "GeForce GTX 1080", { 1, 128, 1 } },
- { "GeForce GTX 480", { 1, 128, 1 } },
- { "GeForce GTX 670", { 1, 64, 1 } },
- { "GeForce GTX 680", { 1, 64, 1 } },
- { "GeForce GTX 750", { 1, 128, 1 } },
- { "GeForce GTX 750 Ti", { 1, 256, 2 } },
- { "GeForce GTX 980", { 1, 256, 1 } },
- { "GeForce GTX TITAN", { 2, 1024, 1 } },
- { "GeForce GTX TITAN Black", { 2, 128, 1 } },
- { "GeForce GTX TITAN X", { 1, 512, 1 } },
- { "TITAN X (Pascal)", { 2, 512, 1 } },
- { "Tesla K20m", { 2, 128, 1 } },
- { "Tesla K40m", { 2, 128, 1 } },
- { "default", { 1, 128, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 256, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XaxpyComplexDouble = {
- "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
- { "Ellesmere", { 1, 128, 1 } },
- { "Fiji", { 1, 64, 1 } },
- { "Hawaii", { 2, 64, 1 } },
- { "Oland", { 1, 256, 1 } },
- { "Pitcairn", { 1, 128, 1 } },
- { "Tahiti", { 1, 128, 1 } },
- { "Tonga", { 1, 64, 1 } },
- { "default", { 1, 128, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 1, 64, 8 } },
- { "default", { 1, 64, 8 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } },
- { "default", { 8, 256, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
- { "default", { 1, 1024, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 64, 1 } },
- { "GeForce GTX 1070", { 1, 64, 2 } },
- { "GeForce GTX 1080", { 1, 256, 1 } },
- { "GeForce GTX 480", { 1, 128, 1 } },
- { "GeForce GTX 670", { 1, 256, 1 } },
- { "GeForce GTX 680", { 1, 64, 1 } },
- { "GeForce GTX 750", { 1, 1024, 1 } },
- { "GeForce GTX 750 Ti", { 1, 64, 2 } },
- { "GeForce GTX 980", { 1, 1024, 1 } },
- { "GeForce GTX TITAN", { 1, 64, 4 } },
- { "GeForce GTX TITAN Black", { 1, 128, 4 } },
- { "GeForce GTX TITAN X", { 1, 1024, 1 } },
- { "TITAN X (Pascal)", { 1, 256, 2 } },
- { "Tesla K20m", { 1, 64, 1 } },
- { "Tesla K40m", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 256, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xaxpy/xaxpy.hpp b/src/database/kernels/xaxpy/xaxpy.hpp
new file mode 100644
index 00000000..aa920183
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xaxpy/xaxpy_16.hpp"
+#include "database/kernels/xaxpy/xaxpy_32.hpp"
+#include "database/kernels/xaxpy/xaxpy_3232.hpp"
+#include "database/kernels/xaxpy/xaxpy_64.hpp"
+#include "database/kernels/xaxpy/xaxpy_6464.hpp"
diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp
new file mode 100644
index 00000000..08f635f8
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XaxpyHalf = {
+ "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 4, 128, 4 } },
+ { "default", { 4, 128, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } },
+ { "default", { 8, 64, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 8, 64, 1 } },
+ { "default", { 8, 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp
new file mode 100644
index 00000000..e5530a15
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy_32.hpp
@@ -0,0 +1,101 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XaxpySingle = {
+ "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
+ { "ATI Radeon HD 6750M", { 1, 256, 2 } },
+ { "Ellesmere", { 1, 64, 4 } },
+ { "Fiji", { 4, 64, 1 } },
+ { "Hawaii", { 2, 64, 2 } },
+ { "Oland", { 1, 128, 1 } },
+ { "Pitcairn", { 2, 128, 1 } },
+ { "Tahiti", { 2, 64, 1 } },
+ { "Tonga", { 1, 256, 8 } },
+ { "Turks", { 2, 256, 1 } },
+ { "default", { 2, 256, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 4, 256, 1 } },
+ { "default", { 4, 256, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } },
+ { "default", { 8, 512, 1 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 1, 128, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } },
+ { "Iris", { 1, 64, 1 } },
+ { "Iris Pro", { 1, 128, 2 } },
+ { "default", { 4, 256, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } },
+ { "default", { 2, 1024, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 2, 64, 1 } },
+ { "GeForce GT 650M", { 2, 1024, 1 } },
+ { "GeForce GTX 1070", { 1, 64, 4 } },
+ { "GeForce GTX 1080", { 1, 256, 1 } },
+ { "GeForce GTX 480", { 2, 128, 1 } },
+ { "GeForce GTX 670", { 2, 64, 1 } },
+ { "GeForce GTX 680", { 1, 128, 1 } },
+ { "GeForce GTX 750", { 1, 64, 1 } },
+ { "GeForce GTX 750 Ti", { 2, 64, 1 } },
+ { "GeForce GTX 980", { 1, 1024, 1 } },
+ { "GeForce GTX TITAN", { 4, 256, 1 } },
+ { "GeForce GTX TITAN Black", { 4, 128, 4 } },
+ { "GeForce GTX TITAN X", { 1, 64, 1 } },
+ { "TITAN X (Pascal)", { 4, 128, 1 } },
+ { "Tesla K20m", { 4, 128, 1 } },
+ { "Tesla K40m", { 4, 128, 1 } },
+ { "default", { 4, 1024, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 4, 128, 2 } },
+ { "default", { 4, 128, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 4, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp
new file mode 100644
index 00000000..f0a2d117
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp
@@ -0,0 +1,100 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XaxpyComplexSingle = {
+ "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } },
+ { "ATI Radeon HD 6750M", { 1, 64, 1 } },
+ { "Ellesmere", { 2, 256, 1 } },
+ { "Fiji", { 1, 128, 2 } },
+ { "Hawaii", { 1, 128, 2 } },
+ { "Oland", { 1, 128, 1 } },
+ { "Pitcairn", { 1, 64, 1 } },
+ { "Tahiti", { 1, 64, 1 } },
+ { "Tonga", { 1, 256, 8 } },
+ { "Turks", { 2, 256, 1 } },
+ { "default", { 1, 128, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 1, 256, 1 } },
+ { "default", { 1, 256, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } },
+ { "default", { 8, 1024, 1 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 4, 64, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } },
+ { "Iris", { 2, 128, 1 } },
+ { "Iris Pro", { 1, 256, 8 } },
+ { "default", { 4, 64, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
+ { "default", { 1, 1024, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 512, 1 } },
+ { "GeForce GTX 1070", { 1, 64, 2 } },
+ { "GeForce GTX 1080", { 2, 64, 1 } },
+ { "GeForce GTX 480", { 1, 256, 1 } },
+ { "GeForce GTX 670", { 1, 256, 1 } },
+ { "GeForce GTX 680", { 1, 256, 1 } },
+ { "GeForce GTX 750", { 1, 512, 1 } },
+ { "GeForce GTX 750 Ti", { 1, 512, 1 } },
+ { "GeForce GTX 980", { 1, 64, 1 } },
+ { "GeForce GTX TITAN", { 1, 256, 1 } },
+ { "GeForce GTX TITAN Black", { 1, 128, 2 } },
+ { "GeForce GTX TITAN X", { 1, 512, 1 } },
+ { "TITAN X (Pascal)", { 2, 512, 1 } },
+ { "Tesla K20m", { 1, 128, 1 } },
+ { "Tesla K40m", { 1, 128, 1 } },
+ { "default", { 1, 256, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 128, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp
new file mode 100644
index 00000000..dc9ba62c
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XaxpyDouble = {
+ "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
+ { "Ellesmere", { 2, 64, 4 } },
+ { "Fiji", { 2, 64, 4 } },
+ { "Hawaii", { 1, 64, 2 } },
+ { "Oland", { 1, 64, 1 } },
+ { "Pitcairn", { 1, 128, 1 } },
+ { "Tahiti", { 1, 64, 1 } },
+ { "Tonga", { 1, 128, 4 } },
+ { "default", { 2, 64, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 2, 128, 2 } },
+ { "default", { 2, 128, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } },
+ { "default", { 8, 64, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } },
+ { "default", { 2, 512, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 64, 1 } },
+ { "GeForce GTX 1070", { 1, 64, 8 } },
+ { "GeForce GTX 1080", { 1, 128, 1 } },
+ { "GeForce GTX 480", { 1, 128, 1 } },
+ { "GeForce GTX 670", { 1, 64, 1 } },
+ { "GeForce GTX 680", { 1, 64, 1 } },
+ { "GeForce GTX 750", { 1, 128, 1 } },
+ { "GeForce GTX 750 Ti", { 1, 256, 2 } },
+ { "GeForce GTX 980", { 1, 256, 1 } },
+ { "GeForce GTX TITAN", { 2, 1024, 1 } },
+ { "GeForce GTX TITAN Black", { 2, 128, 1 } },
+ { "GeForce GTX TITAN X", { 1, 512, 1 } },
+ { "TITAN X (Pascal)", { 2, 512, 1 } },
+ { "Tesla K20m", { 2, 128, 1 } },
+ { "Tesla K40m", { 2, 128, 1 } },
+ { "default", { 1, 128, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 256, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp
new file mode 100644
index 00000000..0b5f5bcf
--- /dev/null
+++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xaxpy6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XaxpyComplexDouble = {
+ "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
+ { "Ellesmere", { 1, 128, 1 } },
+ { "Fiji", { 1, 64, 1 } },
+ { "Hawaii", { 2, 64, 1 } },
+ { "Oland", { 1, 256, 1 } },
+ { "Pitcairn", { 1, 128, 1 } },
+ { "Tahiti", { 1, 128, 1 } },
+ { "Tonga", { 1, 64, 1 } },
+ { "default", { 1, 128, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 1, 64, 8 } },
+ { "default", { 1, 64, 8 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } },
+ { "default", { 8, 256, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
+ { "default", { 1, 1024, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 64, 1 } },
+ { "GeForce GTX 1070", { 1, 64, 2 } },
+ { "GeForce GTX 1080", { 1, 256, 1 } },
+ { "GeForce GTX 480", { 1, 128, 1 } },
+ { "GeForce GTX 670", { 1, 256, 1 } },
+ { "GeForce GTX 680", { 1, 64, 1 } },
+ { "GeForce GTX 750", { 1, 1024, 1 } },
+ { "GeForce GTX 750 Ti", { 1, 64, 2 } },
+ { "GeForce GTX 980", { 1, 1024, 1 } },
+ { "GeForce GTX TITAN", { 1, 64, 4 } },
+ { "GeForce GTX TITAN Black", { 1, 128, 4 } },
+ { "GeForce GTX TITAN X", { 1, 1024, 1 } },
+ { "TITAN X (Pascal)", { 1, 256, 2 } },
+ { "Tesla K20m", { 1, 64, 1 } },
+ { "Tesla K40m", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 256, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp
deleted file mode 100644
index 986c32b2..00000000
--- a/src/database/kernels/xdot.hpp
+++ /dev/null
@@ -1,292 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xdot' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XdotHalf = {
- "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 256, 64 } },
- { "default", { 256, 64 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } },
- { "default", { 128, 32 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 64, 64 } },
- { "default", { 64, 64 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 64 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XdotSingle = {
- "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } },
- { "ATI Radeon HD 6750M", { 256, 32 } },
- { "Ellesmere", { 128, 32 } },
- { "Fiji", { 256, 32 } },
- { "Oland", { 256, 32 } },
- { "Pitcairn", { 128, 32 } },
- { "Tahiti", { 128, 32 } },
- { "Tonga", { 64, 32 } },
- { "Turks", { 128, 64 } },
- { "default", { 256, 32 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } },
- { "default", { 64, 64 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 64, 32 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } },
- { "Iris Pro", { 512, 64 } },
- { "default", { 64, 32 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 32 } },
- { "GeForce GT 650M", { 128, 64 } },
- { "GeForce GTX 1070", { 128, 1024 } },
- { "GeForce GTX 1080", { 512, 64 } },
- { "GeForce GTX 480", { 512, 32 } },
- { "GeForce GTX 670", { 512, 1024 } },
- { "GeForce GTX 680", { 128, 128 } },
- { "GeForce GTX 750", { 128, 32 } },
- { "GeForce GTX 750 Ti", { 64, 32 } },
- { "GeForce GTX 980", { 256, 32 } },
- { "GeForce GTX TITAN Black", { 512, 64 } },
- { "GeForce GTX TITAN X", { 256, 32 } },
- { "TITAN X (Pascal)", { 1024, 32 } },
- { "Tesla K20m", { 1024, 32 } },
- { "default", { 256, 64 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 128, 64 } },
- { "default", { 128, 64 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XdotComplexSingle = {
- "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
- { "ATI Radeon HD 6750M", { 256, 256 } },
- { "Ellesmere", { 256, 32 } },
- { "Fiji", { 256, 64 } },
- { "Oland", { 128, 32 } },
- { "Pitcairn", { 256, 32 } },
- { "Tahiti", { 64, 32 } },
- { "Tonga", { 256, 64 } },
- { "Turks", { 128, 32 } },
- { "default", { 256, 32 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
- { "default", { 256, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 256, 32 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } },
- { "Iris Pro", { 32, 32 } },
- { "default", { 32, 32 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 64, 32 } },
- { "GeForce GTX 1070", { 128, 32 } },
- { "GeForce GTX 1080", { 128, 64 } },
- { "GeForce GTX 480", { 512, 32 } },
- { "GeForce GTX 670", { 256, 32 } },
- { "GeForce GTX 680", { 128, 64 } },
- { "GeForce GTX 750", { 64, 32 } },
- { "GeForce GTX 750 Ti", { 64, 32 } },
- { "GeForce GTX 980", { 256, 64 } },
- { "GeForce GTX TITAN Black", { 128, 64 } },
- { "GeForce GTX TITAN X", { 256, 32 } },
- { "TITAN X (Pascal)", { 256, 32 } },
- { "Tesla K20m", { 512, 32 } },
- { "default", { 512, 64 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 64, 256 } },
- { "default", { 64, 256 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 256, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XdotDouble = {
- "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } },
- { "Ellesmere", { 128, 64 } },
- { "Fiji", { 256, 32 } },
- { "Oland", { 256, 32 } },
- { "Pitcairn", { 128, 32 } },
- { "Tahiti", { 256, 32 } },
- { "Tonga", { 128, 64 } },
- { "default", { 128, 64 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
- { "default", { 256, 64 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 32 } },
- { "GeForce GTX 1070", { 128, 512 } },
- { "GeForce GTX 1080", { 128, 128 } },
- { "GeForce GTX 480", { 512, 32 } },
- { "GeForce GTX 670", { 256, 32 } },
- { "GeForce GTX 680", { 128, 64 } },
- { "GeForce GTX 750", { 64, 256 } },
- { "GeForce GTX 750 Ti", { 128, 64 } },
- { "GeForce GTX 980", { 128, 32 } },
- { "GeForce GTX TITAN Black", { 128, 64 } },
- { "GeForce GTX TITAN X", { 256, 32 } },
- { "TITAN X (Pascal)", { 128, 32 } },
- { "Tesla K20m", { 512, 32 } },
- { "default", { 128, 128 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 64 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XdotComplexDouble = {
- "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
- { "Ellesmere", { 256, 32 } },
- { "Fiji", { 256, 32 } },
- { "Oland", { 256, 32 } },
- { "Pitcairn", { 256, 32 } },
- { "Tahiti", { 256, 32 } },
- { "Tonga", { 128, 64 } },
- { "default", { 256, 32 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } },
- { "default", { 128, 32 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 64, 32 } },
- { "GeForce GTX 1070", { 128, 64 } },
- { "GeForce GTX 1080", { 128, 32 } },
- { "GeForce GTX 480", { 512, 32 } },
- { "GeForce GTX 670", { 512, 128 } },
- { "GeForce GTX 680", { 256, 64 } },
- { "GeForce GTX 750", { 256, 32 } },
- { "GeForce GTX 750 Ti", { 64, 32 } },
- { "GeForce GTX 980", { 64, 32 } },
- { "GeForce GTX TITAN Black", { 128, 32 } },
- { "GeForce GTX TITAN X", { 128, 32 } },
- { "TITAN X (Pascal)", { 128, 64 } },
- { "Tesla K20m", { 128, 32 } },
- { "default", { 128, 64 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 256, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xdot/xdot.hpp b/src/database/kernels/xdot/xdot.hpp
new file mode 100644
index 00000000..5d54cdc9
--- /dev/null
+++ b/src/database/kernels/xdot/xdot.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xdot/xdot_16.hpp"
+#include "database/kernels/xdot/xdot_32.hpp"
+#include "database/kernels/xdot/xdot_3232.hpp"
+#include "database/kernels/xdot/xdot_64.hpp"
+#include "database/kernels/xdot/xdot_6464.hpp"
diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp
new file mode 100644
index 00000000..570604ec
--- /dev/null
+++ b/src/database/kernels/xdot/xdot_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XdotHalf = {
+ "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 256, 64 } },
+ { "default", { 256, 64 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } },
+ { "default", { 128, 32 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 64, 64 } },
+ { "default", { 64, 64 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 64 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp
new file mode 100644
index 00000000..8a7bd9d8
--- /dev/null
+++ b/src/database/kernels/xdot/xdot_32.hpp
@@ -0,0 +1,83 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XdotSingle = {
+ "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } },
+ { "ATI Radeon HD 6750M", { 256, 32 } },
+ { "Ellesmere", { 128, 32 } },
+ { "Fiji", { 256, 32 } },
+ { "Oland", { 256, 32 } },
+ { "Pitcairn", { 128, 32 } },
+ { "Tahiti", { 128, 32 } },
+ { "Tonga", { 64, 32 } },
+ { "Turks", { 128, 64 } },
+ { "default", { 256, 32 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } },
+ { "default", { 64, 64 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 64, 32 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } },
+ { "Iris Pro", { 512, 64 } },
+ { "default", { 64, 32 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 32 } },
+ { "GeForce GT 650M", { 128, 64 } },
+ { "GeForce GTX 1070", { 128, 1024 } },
+ { "GeForce GTX 1080", { 512, 64 } },
+ { "GeForce GTX 480", { 512, 32 } },
+ { "GeForce GTX 670", { 512, 1024 } },
+ { "GeForce GTX 680", { 128, 128 } },
+ { "GeForce GTX 750", { 128, 32 } },
+ { "GeForce GTX 750 Ti", { 64, 32 } },
+ { "GeForce GTX 980", { 256, 32 } },
+ { "GeForce GTX TITAN Black", { 512, 64 } },
+ { "GeForce GTX TITAN X", { 256, 32 } },
+ { "TITAN X (Pascal)", { 1024, 32 } },
+ { "Tesla K20m", { 1024, 32 } },
+ { "default", { 256, 64 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 128, 64 } },
+ { "default", { 128, 64 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp
new file mode 100644
index 00000000..4950c1f2
--- /dev/null
+++ b/src/database/kernels/xdot/xdot_3232.hpp
@@ -0,0 +1,82 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XdotComplexSingle = {
+ "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
+ { "ATI Radeon HD 6750M", { 256, 256 } },
+ { "Ellesmere", { 256, 32 } },
+ { "Fiji", { 256, 64 } },
+ { "Oland", { 128, 32 } },
+ { "Pitcairn", { 256, 32 } },
+ { "Tahiti", { 64, 32 } },
+ { "Tonga", { 256, 64 } },
+ { "Turks", { 128, 32 } },
+ { "default", { 256, 32 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
+ { "default", { 256, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 256, 32 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } },
+ { "Iris Pro", { 32, 32 } },
+ { "default", { 32, 32 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 64, 32 } },
+ { "GeForce GTX 1070", { 128, 32 } },
+ { "GeForce GTX 1080", { 128, 64 } },
+ { "GeForce GTX 480", { 512, 32 } },
+ { "GeForce GTX 670", { 256, 32 } },
+ { "GeForce GTX 680", { 128, 64 } },
+ { "GeForce GTX 750", { 64, 32 } },
+ { "GeForce GTX 750 Ti", { 64, 32 } },
+ { "GeForce GTX 980", { 256, 64 } },
+ { "GeForce GTX TITAN Black", { 128, 64 } },
+ { "GeForce GTX TITAN X", { 256, 32 } },
+ { "TITAN X (Pascal)", { 256, 32 } },
+ { "Tesla K20m", { 512, 32 } },
+ { "default", { 512, 64 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 64, 256 } },
+ { "default", { 64, 256 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 256, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp
new file mode 100644
index 00000000..7fbcb474
--- /dev/null
+++ b/src/database/kernels/xdot/xdot_64.hpp
@@ -0,0 +1,63 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XdotDouble = {
+ "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } },
+ { "Ellesmere", { 128, 64 } },
+ { "Fiji", { 256, 32 } },
+ { "Oland", { 256, 32 } },
+ { "Pitcairn", { 128, 32 } },
+ { "Tahiti", { 256, 32 } },
+ { "Tonga", { 128, 64 } },
+ { "default", { 128, 64 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
+ { "default", { 256, 64 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 32 } },
+ { "GeForce GTX 1070", { 128, 512 } },
+ { "GeForce GTX 1080", { 128, 128 } },
+ { "GeForce GTX 480", { 512, 32 } },
+ { "GeForce GTX 670", { 256, 32 } },
+ { "GeForce GTX 680", { 128, 64 } },
+ { "GeForce GTX 750", { 64, 256 } },
+ { "GeForce GTX 750 Ti", { 128, 64 } },
+ { "GeForce GTX 980", { 128, 32 } },
+ { "GeForce GTX TITAN Black", { 128, 64 } },
+ { "GeForce GTX TITAN X", { 256, 32 } },
+ { "TITAN X (Pascal)", { 128, 32 } },
+ { "Tesla K20m", { 512, 32 } },
+ { "default", { 128, 128 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 64 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp
new file mode 100644
index 00000000..ea23b329
--- /dev/null
+++ b/src/database/kernels/xdot/xdot_6464.hpp
@@ -0,0 +1,63 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xdot6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XdotComplexDouble = {
+ "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
+ { "Ellesmere", { 256, 32 } },
+ { "Fiji", { 256, 32 } },
+ { "Oland", { 256, 32 } },
+ { "Pitcairn", { 256, 32 } },
+ { "Tahiti", { 256, 32 } },
+ { "Tonga", { 128, 64 } },
+ { "default", { 256, 32 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } },
+ { "default", { 128, 32 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 64, 32 } },
+ { "GeForce GTX 1070", { 128, 64 } },
+ { "GeForce GTX 1080", { 128, 32 } },
+ { "GeForce GTX 480", { 512, 32 } },
+ { "GeForce GTX 670", { 512, 128 } },
+ { "GeForce GTX 680", { 256, 64 } },
+ { "GeForce GTX 750", { 256, 32 } },
+ { "GeForce GTX 750 Ti", { 64, 32 } },
+ { "GeForce GTX 980", { 64, 32 } },
+ { "GeForce GTX TITAN Black", { 128, 32 } },
+ { "GeForce GTX TITAN X", { 128, 32 } },
+ { "TITAN X (Pascal)", { 128, 64 } },
+ { "Tesla K20m", { 128, 32 } },
+ { "default", { 128, 64 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 256, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp
deleted file mode 100644
index 43854afb..00000000
--- a/src/database/kernels/xgemm.hpp
+++ /dev/null
@@ -1,348 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmHalf = {
- "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmSingle = {
- "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } },
- { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } },
- { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } },
- { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } },
- { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } },
- { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } },
- { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } },
- { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } },
- { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
- { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
- { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } },
- { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
- { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } },
- { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } },
- { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
- { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } },
- { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } },
- { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } },
- { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } },
- { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } },
- { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } },
- { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } },
- { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } },
- { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } },
- { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } },
- { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } },
- { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } },
- { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } },
- { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } },
- { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
- { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } },
- { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
- { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmComplexSingle = {
- "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
- { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
- { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
- { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } },
- { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } },
- { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } },
- { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } },
- { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } },
- { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } },
- { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
- { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } },
- { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } },
- { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } },
- { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
- { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
- { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } },
- { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } },
- { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } },
- { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } },
- { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
- { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } },
- { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } },
- { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } },
- { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } },
- { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } },
- { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } },
- { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } },
- { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } },
- { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } },
- { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } },
- { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDouble = {
- "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } },
- { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } },
- { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
- { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } },
- { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } },
- { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } },
- { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
- { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
- { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
- { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } },
- { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
- { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } },
- { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
- { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
- { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } },
- { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } },
- { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } },
- { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } },
- { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } },
- { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } },
- { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } },
- { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } },
- { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
- { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } },
- { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } },
- { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } },
- { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmComplexDouble = {
- "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } },
- { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
- { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
- { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } },
- { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
- { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } },
- { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } },
- { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } },
- { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
- { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } },
- { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
- { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } },
- { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
- { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } },
- { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } },
- { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } },
- { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } },
- { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } },
- { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } },
- { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } },
- { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } },
- { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } },
- { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
- { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } },
- { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } },
- { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xgemm/xgemm.hpp b/src/database/kernels/xgemm/xgemm.hpp
new file mode 100644
index 00000000..d5d382f0
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xgemm/xgemm_16.hpp"
+#include "database/kernels/xgemm/xgemm_32.hpp"
+#include "database/kernels/xgemm/xgemm_3232.hpp"
+#include "database/kernels/xgemm/xgemm_64.hpp"
+#include "database/kernels/xgemm/xgemm_6464.hpp"
diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp
new file mode 100644
index 00000000..53a8ccd9
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm_16.hpp
@@ -0,0 +1,36 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmHalf = {
+ "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp
new file mode 100644
index 00000000..2c0a63f2
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm_32.hpp
@@ -0,0 +1,101 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmSingle = {
+ "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } },
+ { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } },
+ { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } },
+ { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } },
+ { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } },
+ { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } },
+ { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } },
+ { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } },
+ { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
+ { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } },
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
+ { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } },
+ { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } },
+ { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
+ { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } },
+ { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } },
+ { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } },
+ { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } },
+ { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } },
+ { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } },
+ { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } },
+ { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } },
+ { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } },
+ { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } },
+ { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } },
+ { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } },
+ { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } },
+ { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
+ { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } },
+ { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp
new file mode 100644
index 00000000..3ba015e2
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm_3232.hpp
@@ -0,0 +1,94 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmComplexSingle = {
+ "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
+ { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
+ { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
+ { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } },
+ { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } },
+ { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } },
+ { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } },
+ { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } },
+ { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } },
+ { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
+ { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } },
+ { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } },
+ { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } },
+ { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
+ { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } },
+ { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } },
+ { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } },
+ { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } },
+ { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
+ { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } },
+ { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } },
+ { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } },
+ { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } },
+ { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } },
+ { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } },
+ { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } },
+ { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } },
+ { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } },
+ { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } },
+ { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp
new file mode 100644
index 00000000..45d04e80
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm_64.hpp
@@ -0,0 +1,80 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDouble = {
+ "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } },
+ { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } },
+ { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
+ { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } },
+ { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } },
+ { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } },
+ { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
+ { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
+ { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
+ { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } },
+ { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
+ { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } },
+ { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
+ { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
+ { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } },
+ { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } },
+ { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } },
+ { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } },
+ { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } },
+ { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } },
+ { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } },
+ { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
+ { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } },
+ { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } },
+ { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } },
+ { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp
new file mode 100644
index 00000000..14f47eff
--- /dev/null
+++ b/src/database/kernels/xgemm/xgemm_6464.hpp
@@ -0,0 +1,79 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmComplexDouble = {
+ "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } },
+ { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
+ { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
+ { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } },
+ { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
+ { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } },
+ { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } },
+ { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } },
+ { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
+ { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } },
+ { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
+ { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } },
+ { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
+ { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } },
+ { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } },
+ { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } },
+ { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } },
+ { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } },
+ { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } },
+ { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } },
+ { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } },
+ { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
+ { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } },
+ { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } },
+ { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp
deleted file mode 100644
index acace63f..00000000
--- a/src/database/kernels/xgemm_direct.hpp
+++ /dev/null
@@ -1,218 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDirectHalf = {
- "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
- { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDirectSingle = {
- "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } },
- { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } },
- { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } },
- { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } },
- { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
- { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } },
- { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } },
- { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
- { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
- { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } },
- { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDirectComplexSingle = {
- "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
- { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
- { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } },
- { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } },
- { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } },
- { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } },
- { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDirectDouble = {
- "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } },
- { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } },
- { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } },
- { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
- { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } },
- { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } },
- { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemmDirectComplexDouble = {
- "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } },
- { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } },
- { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
- { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
- { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
- { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct.hpp b/src/database/kernels/xgemm_direct/xgemm_direct.hpp
new file mode 100644
index 00000000..9a26e7ce
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xgemm_direct/xgemm_direct_16.hpp"
+#include "database/kernels/xgemm_direct/xgemm_direct_32.hpp"
+#include "database/kernels/xgemm_direct/xgemm_direct_3232.hpp"
+#include "database/kernels/xgemm_direct/xgemm_direct_64.hpp"
+#include "database/kernels/xgemm_direct/xgemm_direct_6464.hpp"
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp
new file mode 100644
index 00000000..ed71285f
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp
@@ -0,0 +1,36 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDirectHalf = {
+ "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
+ { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp
new file mode 100644
index 00000000..e0a991c0
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp
@@ -0,0 +1,66 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDirectSingle = {
+ "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } },
+ { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } },
+ { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } },
+ { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } },
+ { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
+ { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } },
+ { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } },
+ { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
+ { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
+ { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } },
+ { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp
new file mode 100644
index 00000000..05e672ac
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp
@@ -0,0 +1,58 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDirectComplexSingle = {
+ "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
+ { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
+ { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } },
+ { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } },
+ { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } },
+ { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } },
+ { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp
new file mode 100644
index 00000000..ac740dae
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp
@@ -0,0 +1,50 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDirectDouble = {
+ "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } },
+ { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } },
+ { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } },
+ { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
+ { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } },
+ { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } },
+ { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp
new file mode 100644
index 00000000..1352ec66
--- /dev/null
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp
@@ -0,0 +1,50 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemmDirectComplexDouble = {
+ "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } },
+ { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } },
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
+ { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
+ { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
+ { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp
deleted file mode 100644
index c537294a..00000000
--- a/src/database/kernels/xgemv.hpp
+++ /dev/null
@@ -1,306 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvHalf = {
- "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 256, 1 } },
- { "default", { 256, 1 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvSingle = {
- "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } },
- { "ATI Radeon HD 6750M", { 32, 1 } },
- { "Ellesmere", { 256, 1 } },
- { "Fiji", { 128, 1 } },
- { "Hawaii", { 128, 1 } },
- { "Oland", { 128, 1 } },
- { "Pitcairn", { 256, 1 } },
- { "Tahiti", { 256, 1 } },
- { "Tonga", { 128, 2 } },
- { "Turks", { 32, 1 } },
- { "default", { 128, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
- { "default", { 64, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 256, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } },
- { "Iris", { 64, 2 } },
- { "Iris Pro", { 128, 1 } },
- { "default", { 128, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 256, 1 } },
- { "GeForce GT 650M", { 256, 1 } },
- { "GeForce GTX 1070", { 128, 1 } },
- { "GeForce GTX 1080", { 32, 1 } },
- { "GeForce GTX 480", { 64, 1 } },
- { "GeForce GTX 670", { 64, 1 } },
- { "GeForce GTX 680", { 256, 1 } },
- { "GeForce GTX 750", { 256, 1 } },
- { "GeForce GTX 750 Ti", { 32, 1 } },
- { "GeForce GTX 980", { 128, 1 } },
- { "GeForce GTX TITAN", { 256, 1 } },
- { "GeForce GTX TITAN Black", { 256, 1 } },
- { "GeForce GTX TITAN X", { 256, 1 } },
- { "TITAN X (Pascal)", { 32, 1 } },
- { "Tesla K20m", { 128, 1 } },
- { "Tesla K40m", { 256, 1 } },
- { "default", { 256, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvComplexSingle = {
- "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
- { "ATI Radeon HD 6750M", { 64, 1 } },
- { "Ellesmere", { 32, 1 } },
- { "Fiji", { 32, 1 } },
- { "Hawaii", { 64, 1 } },
- { "Oland", { 64, 1 } },
- { "Pitcairn", { 64, 1 } },
- { "Tahiti", { 64, 1 } },
- { "Tonga", { 32, 1 } },
- { "Turks", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
- { "default", { 64, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 64, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } },
- { "Iris", { 256, 1 } },
- { "Iris Pro", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 256, 1 } },
- { "GeForce GTX 1070", { 64, 1 } },
- { "GeForce GTX 1080", { 32, 1 } },
- { "GeForce GTX 480", { 64, 1 } },
- { "GeForce GTX 670", { 64, 1 } },
- { "GeForce GTX 680", { 64, 1 } },
- { "GeForce GTX 750", { 128, 1 } },
- { "GeForce GTX 750 Ti", { 32, 1 } },
- { "GeForce GTX TITAN", { 256, 1 } },
- { "GeForce GTX TITAN Black", { 32, 1 } },
- { "TITAN X (Pascal)", { 32, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvDouble = {
- "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
- { "Ellesmere", { 32, 1 } },
- { "Fiji", { 32, 1 } },
- { "Hawaii", { 128, 1 } },
- { "Oland", { 256, 1 } },
- { "Pitcairn", { 256, 1 } },
- { "Tahiti", { 256, 1 } },
- { "Tonga", { 32, 1 } },
- { "default", { 256, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
- { "default", { 64, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 1 } },
- { "GeForce GTX 1070", { 64, 1 } },
- { "GeForce GTX 1080", { 32, 1 } },
- { "GeForce GTX 480", { 256, 1 } },
- { "GeForce GTX 670", { 128, 1 } },
- { "GeForce GTX 680", { 128, 1 } },
- { "GeForce GTX 750", { 64, 1 } },
- { "GeForce GTX 750 Ti", { 32, 1 } },
- { "GeForce GTX 980", { 64, 1 } },
- { "GeForce GTX TITAN", { 256, 1 } },
- { "GeForce GTX TITAN Black", { 32, 1 } },
- { "GeForce GTX TITAN X", { 64, 1 } },
- { "TITAN X (Pascal)", { 32, 1 } },
- { "Tesla K20m", { 256, 1 } },
- { "Tesla K40m", { 256, 1 } },
- { "default", { 128, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvComplexDouble = {
- "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
- { "Ellesmere", { 32, 1 } },
- { "Fiji", { 64, 1 } },
- { "Hawaii", { 64, 1 } },
- { "Oland", { 256, 1 } },
- { "Pitcairn", { 256, 1 } },
- { "Tahiti", { 256, 1 } },
- { "Tonga", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } },
- { "default", { 64, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
- { "default", { 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 1 } },
- { "GeForce GTX 480", { 64, 1 } },
- { "GeForce GTX 670", { 128, 1 } },
- { "default", { 128, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xgemv/xgemv.hpp b/src/database/kernels/xgemv/xgemv.hpp
new file mode 100644
index 00000000..081c995f
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xgemv/xgemv_16.hpp"
+#include "database/kernels/xgemv/xgemv_32.hpp"
+#include "database/kernels/xgemv/xgemv_3232.hpp"
+#include "database/kernels/xgemv/xgemv_64.hpp"
+#include "database/kernels/xgemv/xgemv_6464.hpp"
diff --git a/src/database/kernels/xgemv/xgemv_16.hpp b/src/database/kernels/xgemv/xgemv_16.hpp
new file mode 100644
index 00000000..1fc86276
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv_16.hpp
@@ -0,0 +1,37 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvHalf = {
+ "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 256, 1 } },
+ { "default", { 256, 1 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp
new file mode 100644
index 00000000..fd1aa7aa
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv_32.hpp
@@ -0,0 +1,94 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvSingle = {
+ "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } },
+ { "ATI Radeon HD 6750M", { 32, 1 } },
+ { "Ellesmere", { 256, 1 } },
+ { "Fiji", { 128, 1 } },
+ { "Hawaii", { 128, 1 } },
+ { "Oland", { 128, 1 } },
+ { "Pitcairn", { 256, 1 } },
+ { "Tahiti", { 256, 1 } },
+ { "Tonga", { 128, 2 } },
+ { "Turks", { 32, 1 } },
+ { "default", { 128, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
+ { "default", { 64, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 256, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } },
+ { "Iris", { 64, 2 } },
+ { "Iris Pro", { 128, 1 } },
+ { "default", { 128, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 256, 1 } },
+ { "GeForce GT 650M", { 256, 1 } },
+ { "GeForce GTX 1070", { 128, 1 } },
+ { "GeForce GTX 1080", { 32, 1 } },
+ { "GeForce GTX 480", { 64, 1 } },
+ { "GeForce GTX 670", { 64, 1 } },
+ { "GeForce GTX 680", { 256, 1 } },
+ { "GeForce GTX 750", { 256, 1 } },
+ { "GeForce GTX 750 Ti", { 32, 1 } },
+ { "GeForce GTX 980", { 128, 1 } },
+ { "GeForce GTX TITAN", { 256, 1 } },
+ { "GeForce GTX TITAN Black", { 256, 1 } },
+ { "GeForce GTX TITAN X", { 256, 1 } },
+ { "TITAN X (Pascal)", { 32, 1 } },
+ { "Tesla K20m", { 128, 1 } },
+ { "Tesla K40m", { 256, 1 } },
+ { "default", { 256, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp
new file mode 100644
index 00000000..442dd97f
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv_3232.hpp
@@ -0,0 +1,83 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvComplexSingle = {
+ "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
+ { "ATI Radeon HD 6750M", { 64, 1 } },
+ { "Ellesmere", { 32, 1 } },
+ { "Fiji", { 32, 1 } },
+ { "Hawaii", { 64, 1 } },
+ { "Oland", { 64, 1 } },
+ { "Pitcairn", { 64, 1 } },
+ { "Tahiti", { 64, 1 } },
+ { "Tonga", { 32, 1 } },
+ { "Turks", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
+ { "default", { 64, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 64, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } },
+ { "Iris", { 256, 1 } },
+ { "Iris Pro", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 256, 1 } },
+ { "GeForce GTX 1070", { 64, 1 } },
+ { "GeForce GTX 1080", { 32, 1 } },
+ { "GeForce GTX 480", { 64, 1 } },
+ { "GeForce GTX 670", { 64, 1 } },
+ { "GeForce GTX 680", { 64, 1 } },
+ { "GeForce GTX 750", { 128, 1 } },
+ { "GeForce GTX 750 Ti", { 32, 1 } },
+ { "GeForce GTX TITAN", { 256, 1 } },
+ { "GeForce GTX TITAN Black", { 32, 1 } },
+ { "TITAN X (Pascal)", { 32, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp
new file mode 100644
index 00000000..8dd899c3
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv_64.hpp
@@ -0,0 +1,73 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvDouble = {
+ "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
+ { "Ellesmere", { 32, 1 } },
+ { "Fiji", { 32, 1 } },
+ { "Hawaii", { 128, 1 } },
+ { "Oland", { 256, 1 } },
+ { "Pitcairn", { 256, 1 } },
+ { "Tahiti", { 256, 1 } },
+ { "Tonga", { 32, 1 } },
+ { "default", { 256, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
+ { "default", { 64, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 1 } },
+ { "GeForce GTX 1070", { 64, 1 } },
+ { "GeForce GTX 1080", { 32, 1 } },
+ { "GeForce GTX 480", { 256, 1 } },
+ { "GeForce GTX 670", { 128, 1 } },
+ { "GeForce GTX 680", { 128, 1 } },
+ { "GeForce GTX 750", { 64, 1 } },
+ { "GeForce GTX 750 Ti", { 32, 1 } },
+ { "GeForce GTX 980", { 64, 1 } },
+ { "GeForce GTX TITAN", { 256, 1 } },
+ { "GeForce GTX TITAN Black", { 32, 1 } },
+ { "GeForce GTX TITAN X", { 64, 1 } },
+ { "TITAN X (Pascal)", { 32, 1 } },
+ { "Tesla K20m", { 256, 1 } },
+ { "Tesla K40m", { 256, 1 } },
+ { "default", { 128, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp
new file mode 100644
index 00000000..50dc8ea0
--- /dev/null
+++ b/src/database/kernels/xgemv/xgemv_6464.hpp
@@ -0,0 +1,61 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvComplexDouble = {
+ "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
+ { "Ellesmere", { 32, 1 } },
+ { "Fiji", { 64, 1 } },
+ { "Hawaii", { 64, 1 } },
+ { "Oland", { 256, 1 } },
+ { "Pitcairn", { 256, 1 } },
+ { "Tahiti", { 256, 1 } },
+ { "Tonga", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } },
+ { "default", { 64, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
+ { "default", { 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 1 } },
+ { "GeForce GTX 480", { 64, 1 } },
+ { "GeForce GTX 670", { 128, 1 } },
+ { "default", { 128, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp
deleted file mode 100644
index c3b9103a..00000000
--- a/src/database/kernels/xgemv_fast.hpp
+++ /dev/null
@@ -1,300 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastHalf = {
- "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 1, 32, 1 } },
- { "default", { 1, 32, 1 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } },
- { "default", { 1, 16, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 16, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastSingle = {
- "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
- { "ATI Radeon HD 6750M", { 2, 64, 2 } },
- { "Ellesmere", { 1, 64, 1 } },
- { "Fiji", { 1, 64, 2 } },
- { "Hawaii", { 1, 64, 1 } },
- { "Oland", { 1, 64, 1 } },
- { "Pitcairn", { 1, 64, 1 } },
- { "Tahiti", { 1, 64, 1 } },
- { "Tonga", { 1, 16, 4 } },
- { "Turks", { 1, 256, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } },
- { "default", { 4, 128, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 1, 256, 1 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } },
- { "Iris", { 1, 128, 2 } },
- { "Iris Pro", { 4, 64, 4 } },
- { "default", { 2, 256, 2 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 2, 256, 2 } },
- { "GeForce GT 650M", { 2, 32, 2 } },
- { "GeForce GTX 1070", { 1, 256, 1 } },
- { "GeForce GTX 1080", { 1, 128, 1 } },
- { "GeForce GTX 480", { 1, 128, 1 } },
- { "GeForce GTX 670", { 2, 256, 2 } },
- { "GeForce GTX 680", { 1, 128, 1 } },
- { "GeForce GTX 750", { 1, 256, 1 } },
- { "GeForce GTX 750 Ti", { 2, 32, 2 } },
- { "GeForce GTX 980", { 1, 256, 1 } },
- { "GeForce GTX TITAN", { 1, 256, 1 } },
- { "GeForce GTX TITAN Black", { 1, 256, 1 } },
- { "GeForce GTX TITAN X", { 1, 64, 1 } },
- { "TITAN X (Pascal)", { 1, 64, 1 } },
- { "Tesla K20m", { 1, 256, 1 } },
- { "Tesla K40m", { 1, 256, 1 } },
- { "default", { 1, 256, 1 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 1, 64, 4 } },
- { "default", { 1, 64, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastComplexSingle = {
- "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } },
- { "ATI Radeon HD 6750M", { 1, 128, 1 } },
- { "Ellesmere", { 1, 64, 1 } },
- { "Fiji", { 1, 16, 1 } },
- { "Hawaii", { 1, 64, 1 } },
- { "Oland", { 1, 64, 1 } },
- { "Pitcairn", { 1, 64, 1 } },
- { "Tahiti", { 1, 128, 1 } },
- { "Tonga", { 2, 32, 2 } },
- { "Turks", { 1, 16, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } },
- { "default", { 1, 64, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 2, 128, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } },
- { "Iris", { 1, 64, 1 } },
- { "Iris Pro", { 4, 128, 4 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 256, 1 } },
- { "GeForce GTX 1070", { 1, 64, 1 } },
- { "GeForce GTX 480", { 1, 64, 1 } },
- { "GeForce GTX 670", { 1, 64, 1 } },
- { "GeForce GTX 680", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastDouble = {
- "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
- { "Ellesmere", { 1, 128, 1 } },
- { "Fiji", { 1, 32, 1 } },
- { "Hawaii", { 1, 64, 1 } },
- { "Oland", { 1, 64, 1 } },
- { "Pitcairn", { 1, 64, 1 } },
- { "Tahiti", { 1, 64, 1 } },
- { "Tonga", { 2, 32, 2 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } },
- { "default", { 1, 64, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 256, 1 } },
- { "GeForce GTX 1070", { 1, 256, 1 } },
- { "GeForce GTX 1080", { 1, 32, 2 } },
- { "GeForce GTX 480", { 1, 64, 1 } },
- { "GeForce GTX 670", { 1, 128, 1 } },
- { "GeForce GTX 680", { 1, 128, 1 } },
- { "GeForce GTX 750", { 2, 256, 2 } },
- { "GeForce GTX 750 Ti", { 1, 32, 2 } },
- { "GeForce GTX 980", { 1, 64, 1 } },
- { "GeForce GTX TITAN", { 1, 256, 1 } },
- { "GeForce GTX TITAN Black", { 1, 256, 1 } },
- { "GeForce GTX TITAN X", { 1, 128, 1 } },
- { "TITAN X (Pascal)", { 1, 32, 1 } },
- { "Tesla K20m", { 1, 128, 1 } },
- { "Tesla K40m", { 1, 256, 1 } },
- { "default", { 1, 256, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastComplexDouble = {
- "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
- { "Ellesmere", { 1, 16, 1 } },
- { "Fiji", { 1, 16, 1 } },
- { "Hawaii", { 1, 64, 1 } },
- { "Oland", { 1, 256, 1 } },
- { "Pitcairn", { 1, 64, 1 } },
- { "Tahiti", { 1, 64, 1 } },
- { "Tonga", { 1, 32, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } },
- { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } },
- { "default", { 4, 64, 4 } },
- }
- },
- { // Intel accelerators
- kDeviceTypeAccelerator, "Intel", {
- { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 1, 128, 1 } },
- { "GeForce GTX 480", { 1, 64, 1 } },
- { "GeForce GTX 670", { 1, 64, 1 } },
- { "default", { 1, 64, 1 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 1, 64, 1 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast.hpp b/src/database/kernels/xgemv_fast/xgemv_fast.hpp
new file mode 100644
index 00000000..5cc2ca6e
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xgemv_fast/xgemv_fast_16.hpp"
+#include "database/kernels/xgemv_fast/xgemv_fast_32.hpp"
+#include "database/kernels/xgemv_fast/xgemv_fast_3232.hpp"
+#include "database/kernels/xgemv_fast/xgemv_fast_64.hpp"
+#include "database/kernels/xgemv_fast/xgemv_fast_6464.hpp"
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp
new file mode 100644
index 00000000..6728e713
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp
@@ -0,0 +1,37 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastHalf = {
+ "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 1, 32, 1 } },
+ { "default", { 1, 32, 1 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } },
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 16, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp
new file mode 100644
index 00000000..eae35316
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp
@@ -0,0 +1,94 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastSingle = {
+ "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
+ { "ATI Radeon HD 6750M", { 2, 64, 2 } },
+ { "Ellesmere", { 1, 64, 1 } },
+ { "Fiji", { 1, 64, 2 } },
+ { "Hawaii", { 1, 64, 1 } },
+ { "Oland", { 1, 64, 1 } },
+ { "Pitcairn", { 1, 64, 1 } },
+ { "Tahiti", { 1, 64, 1 } },
+ { "Tonga", { 1, 16, 4 } },
+ { "Turks", { 1, 256, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } },
+ { "default", { 4, 128, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 1, 256, 1 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } },
+ { "Iris", { 1, 128, 2 } },
+ { "Iris Pro", { 4, 64, 4 } },
+ { "default", { 2, 256, 2 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 2, 256, 2 } },
+ { "GeForce GT 650M", { 2, 32, 2 } },
+ { "GeForce GTX 1070", { 1, 256, 1 } },
+ { "GeForce GTX 1080", { 1, 128, 1 } },
+ { "GeForce GTX 480", { 1, 128, 1 } },
+ { "GeForce GTX 670", { 2, 256, 2 } },
+ { "GeForce GTX 680", { 1, 128, 1 } },
+ { "GeForce GTX 750", { 1, 256, 1 } },
+ { "GeForce GTX 750 Ti", { 2, 32, 2 } },
+ { "GeForce GTX 980", { 1, 256, 1 } },
+ { "GeForce GTX TITAN", { 1, 256, 1 } },
+ { "GeForce GTX TITAN Black", { 1, 256, 1 } },
+ { "GeForce GTX TITAN X", { 1, 64, 1 } },
+ { "TITAN X (Pascal)", { 1, 64, 1 } },
+ { "Tesla K20m", { 1, 256, 1 } },
+ { "Tesla K40m", { 1, 256, 1 } },
+ { "default", { 1, 256, 1 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 1, 64, 4 } },
+ { "default", { 1, 64, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp
new file mode 100644
index 00000000..c66cdc19
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp
@@ -0,0 +1,77 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastComplexSingle = {
+ "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } },
+ { "ATI Radeon HD 6750M", { 1, 128, 1 } },
+ { "Ellesmere", { 1, 64, 1 } },
+ { "Fiji", { 1, 16, 1 } },
+ { "Hawaii", { 1, 64, 1 } },
+ { "Oland", { 1, 64, 1 } },
+ { "Pitcairn", { 1, 64, 1 } },
+ { "Tahiti", { 1, 128, 1 } },
+ { "Tonga", { 2, 32, 2 } },
+ { "Turks", { 1, 16, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } },
+ { "default", { 1, 64, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 2, 128, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } },
+ { "Iris", { 1, 64, 1 } },
+ { "Iris Pro", { 4, 128, 4 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 256, 1 } },
+ { "GeForce GTX 1070", { 1, 64, 1 } },
+ { "GeForce GTX 480", { 1, 64, 1 } },
+ { "GeForce GTX 670", { 1, 64, 1 } },
+ { "GeForce GTX 680", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp
new file mode 100644
index 00000000..53692530
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp
@@ -0,0 +1,73 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastDouble = {
+ "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
+ { "Ellesmere", { 1, 128, 1 } },
+ { "Fiji", { 1, 32, 1 } },
+ { "Hawaii", { 1, 64, 1 } },
+ { "Oland", { 1, 64, 1 } },
+ { "Pitcairn", { 1, 64, 1 } },
+ { "Tahiti", { 1, 64, 1 } },
+ { "Tonga", { 2, 32, 2 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } },
+ { "default", { 1, 64, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 256, 1 } },
+ { "GeForce GTX 1070", { 1, 256, 1 } },
+ { "GeForce GTX 1080", { 1, 32, 2 } },
+ { "GeForce GTX 480", { 1, 64, 1 } },
+ { "GeForce GTX 670", { 1, 128, 1 } },
+ { "GeForce GTX 680", { 1, 128, 1 } },
+ { "GeForce GTX 750", { 2, 256, 2 } },
+ { "GeForce GTX 750 Ti", { 1, 32, 2 } },
+ { "GeForce GTX 980", { 1, 64, 1 } },
+ { "GeForce GTX TITAN", { 1, 256, 1 } },
+ { "GeForce GTX TITAN Black", { 1, 256, 1 } },
+ { "GeForce GTX TITAN X", { 1, 128, 1 } },
+ { "TITAN X (Pascal)", { 1, 32, 1 } },
+ { "Tesla K20m", { 1, 128, 1 } },
+ { "Tesla K40m", { 1, 256, 1 } },
+ { "default", { 1, 256, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp
new file mode 100644
index 00000000..fdf3d508
--- /dev/null
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp
@@ -0,0 +1,61 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastComplexDouble = {
+ "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
+ { "Ellesmere", { 1, 16, 1 } },
+ { "Fiji", { 1, 16, 1 } },
+ { "Hawaii", { 1, 64, 1 } },
+ { "Oland", { 1, 256, 1 } },
+ { "Pitcairn", { 1, 64, 1 } },
+ { "Tahiti", { 1, 64, 1 } },
+ { "Tonga", { 1, 32, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } },
+ { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } },
+ { "default", { 4, 64, 4 } },
+ }
+ },
+ { // Intel accelerators
+ kDeviceTypeAccelerator, "Intel", {
+ { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 1, 128, 1 } },
+ { "GeForce GTX 480", { 1, 64, 1 } },
+ { "GeForce GTX 670", { 1, 64, 1 } },
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 1, 64, 1 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp
deleted file mode 100644
index 7e5905e4..00000000
--- a/src/database/kernels/xgemv_fast_rot.hpp
+++ /dev/null
@@ -1,213 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastRotHalf = {
- "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 8, 32, 32 } },
- { "default", { 8, 32, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } },
- { "default", { 8, 128, 32 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 128, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastRotSingle = {
- "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } },
- { "ATI Radeon HD 6750M", { 8, 128, 16 } },
- { "Ellesmere", { 8, 32, 32 } },
- { "Fiji", { 4, 32, 16 } },
- { "Tonga", { 8, 128, 32 } },
- { "Turks", { 8, 128, 16 } },
- { "default", { 8, 32, 32 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
- { "default", { 8, 32, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
- { "Iris Pro", { 4, 16, 16 } },
- { "default", { 4, 64, 16 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GT 650M", { 8, 32, 16 } },
- { "GeForce GTX 1080", { 8, 32, 32 } },
- { "GeForce GTX 750 Ti", { 8, 32, 32 } },
- { "GeForce GTX TITAN", { 1, 16, 16 } },
- { "GeForce GTX TITAN Black", { 4, 128, 16 } },
- { "TITAN X (Pascal)", { 8, 64, 32 } },
- { "default", { 8, 32, 32 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 4, 64, 16 } },
- { "default", { 4, 64, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 8, 32, 32 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastRotComplexSingle = {
- "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } },
- { "ATI Radeon HD 6750M", { 8, 32, 8 } },
- { "Ellesmere", { 2, 32, 16 } },
- { "Fiji", { 4, 32, 32 } },
- { "Tonga", { 4, 32, 32 } },
- { "Turks", { 4, 32, 8 } },
- { "default", { 8, 16, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } },
- { "default", { 4, 32, 32 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
- { "Iris Pro", { 4, 16, 16 } },
- { "default", { 2, 32, 8 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 4, 16, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastRotDouble = {
- "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } },
- { "Ellesmere", { 4, 16, 16 } },
- { "Fiji", { 4, 32, 32 } },
- { "Tonga", { 4, 16, 16 } },
- { "default", { 4, 16, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
- { "default", { 8, 32, 32 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GeForce GTX 1080", { 8, 32, 32 } },
- { "GeForce GTX 750 Ti", { 4, 32, 16 } },
- { "GeForce GTX TITAN", { 1, 16, 16 } },
- { "GeForce GTX TITAN Black", { 1, 16, 16 } },
- { "TITAN X (Pascal)", { 8, 32, 32 } },
- { "default", { 4, 32, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 4, 16, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgemvFastRotComplexDouble = {
- "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } },
- { "Ellesmere", { 4, 16, 16 } },
- { "Fiji", { 4, 32, 8 } },
- { "Tonga", { 4, 16, 8 } },
- { "default", { 8, 32, 16 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } },
- { "default", { 8, 16, 16 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 4, 16, 16 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp
new file mode 100644
index 00000000..7379eba9
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp"
+#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp"
+#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp"
+#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp"
+#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp"
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp
new file mode 100644
index 00000000..8d516141
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp
@@ -0,0 +1,36 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastRotHalf = {
+ "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 8, 32, 32 } },
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } },
+ { "default", { 8, 128, 32 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 128, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp
new file mode 100644
index 00000000..71b8b355
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp
@@ -0,0 +1,71 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastRotSingle = {
+ "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } },
+ { "ATI Radeon HD 6750M", { 8, 128, 16 } },
+ { "Ellesmere", { 8, 32, 32 } },
+ { "Fiji", { 4, 32, 16 } },
+ { "Tonga", { 8, 128, 32 } },
+ { "Turks", { 8, 128, 16 } },
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
+ { "Iris Pro", { 4, 16, 16 } },
+ { "default", { 4, 64, 16 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GT 650M", { 8, 32, 16 } },
+ { "GeForce GTX 1080", { 8, 32, 32 } },
+ { "GeForce GTX 750 Ti", { 8, 32, 32 } },
+ { "GeForce GTX TITAN", { 1, 16, 16 } },
+ { "GeForce GTX TITAN Black", { 4, 128, 16 } },
+ { "TITAN X (Pascal)", { 8, 64, 32 } },
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 4, 64, 16 } },
+ { "default", { 4, 64, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp
new file mode 100644
index 00000000..4fd88fc4
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp
@@ -0,0 +1,54 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastRotComplexSingle = {
+ "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } },
+ { "ATI Radeon HD 6750M", { 8, 32, 8 } },
+ { "Ellesmere", { 2, 32, 16 } },
+ { "Fiji", { 4, 32, 32 } },
+ { "Tonga", { 4, 32, 32 } },
+ { "Turks", { 4, 32, 8 } },
+ { "default", { 8, 16, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } },
+ { "default", { 4, 32, 32 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
+ { "Iris Pro", { 4, 16, 16 } },
+ { "default", { 2, 32, 8 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 4, 16, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp
new file mode 100644
index 00000000..66299b56
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp
@@ -0,0 +1,52 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastRotDouble = {
+ "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } },
+ { "Ellesmere", { 4, 16, 16 } },
+ { "Fiji", { 4, 32, 32 } },
+ { "Tonga", { 4, 16, 16 } },
+ { "default", { 4, 16, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
+ { "default", { 8, 32, 32 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GeForce GTX 1080", { 8, 32, 32 } },
+ { "GeForce GTX 750 Ti", { 4, 32, 16 } },
+ { "GeForce GTX TITAN", { 1, 16, 16 } },
+ { "GeForce GTX TITAN Black", { 1, 16, 16 } },
+ { "TITAN X (Pascal)", { 8, 32, 32 } },
+ { "default", { 4, 32, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 4, 16, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp
new file mode 100644
index 00000000..bc1964ff
--- /dev/null
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp
@@ -0,0 +1,42 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgemvFastRotComplexDouble = {
+ "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } },
+ { "Ellesmere", { 4, 16, 16 } },
+ { "Fiji", { 4, 32, 8 } },
+ { "Tonga", { 4, 16, 8 } },
+ { "default", { 8, 32, 16 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } },
+ { "default", { 8, 16, 16 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 4, 16, 16 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp
deleted file mode 100644
index e17396f6..00000000
--- a/src/database/kernels/xger.hpp
+++ /dev/null
@@ -1,316 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const Database::DatabaseEntry XgerHalf = {
- "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "Ellesmere", { 64, 1, 2 } },
- { "default", { 64, 1, 2 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } },
- { "default", { 4, 8, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 64, 4, 2 } },
- { "default", { 64, 4, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 1, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgerSingle = {
- "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } },
- { "ATI Radeon HD 6750M", { 16, 16, 4 } },
- { "Ellesmere", { 64, 4, 2 } },
- { "Fiji", { 256, 1, 1 } },
- { "Hawaii", { 64, 2, 1 } },
- { "Oland", { 32, 4, 2 } },
- { "Pitcairn", { 64, 1, 1 } },
- { "Tahiti", { 256, 1, 1 } },
- { "Tonga", { 256, 1, 2 } },
- { "Turks", { 64, 4, 2 } },
- { "default", { 16, 16, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 64, 4, 4 } },
- { "default", { 64, 4, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } },
- { "default", { 128, 8, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 32, 1, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } },
- { "Iris Pro", { 64, 1, 4 } },
- { "default", { 32, 4, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 1, 2 } },
- { "GeForce GT 650M", { 32, 16, 4 } },
- { "GeForce GTX 1070", { 512, 1, 1 } },
- { "GeForce GTX 1080", { 16, 4, 1 } },
- { "GeForce GTX 480", { 256, 1, 4 } },
- { "GeForce GTX 670", { 32, 8, 2 } },
- { "GeForce GTX 680", { 128, 1, 4 } },
- { "GeForce GTX 750", { 64, 16, 4 } },
- { "GeForce GTX 750 Ti", { 64, 1, 2 } },
- { "GeForce GTX TITAN", { 32, 4, 2 } },
- { "GeForce GTX TITAN Black", { 32, 4, 2 } },
- { "TITAN X (Pascal)", { 512, 2, 1 } },
- { "default", { 128, 1, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 128, 1, 2 } },
- { "default", { 128, 1, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 32, 4, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgerComplexSingle = {
- "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } },
- { "ATI Radeon HD 6750M", { 16, 16, 1 } },
- { "Ellesmere", { 16, 8, 2 } },
- { "Fiji", { 128, 2, 1 } },
- { "Hawaii", { 64, 1, 2 } },
- { "Oland", { 4, 8, 1 } },
- { "Pitcairn", { 128, 2, 1 } },
- { "Tahiti", { 64, 2, 1 } },
- { "Tonga", { 64, 1, 1 } },
- { "Turks", { 128, 2, 1 } },
- { "default", { 128, 2, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 128, 1, 1 } },
- { "default", { 128, 1, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } },
- { "default", { 256, 2, 4 } },
- }
- },
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics 530", { 32, 1, 2 } },
- { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } },
- { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } },
- { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } },
- { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } },
- { "Iris Pro", { 16, 2, 4 } },
- { "default", { 128, 2, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 64, 4, 2 } },
- { "GeForce GTX 1070", { 16, 64, 2 } },
- { "GeForce GTX 1080", { 32, 2, 1 } },
- { "GeForce GTX 480", { 128, 2, 2 } },
- { "GeForce GTX 670", { 16, 32, 2 } },
- { "GeForce GTX 680", { 32, 4, 2 } },
- { "GeForce GTX 750", { 32, 16, 4 } },
- { "GeForce GTX 750 Ti", { 32, 8, 2 } },
- { "GeForce GTX TITAN", { 16, 16, 2 } },
- { "GeForce GTX TITAN Black", { 16, 16, 2 } },
- { "TITAN X (Pascal)", { 32, 2, 1 } },
- { "default", { 128, 2, 2 } },
- }
- },
- { // QUALCOMM GPUs
- kDeviceTypeGPU, "QUALCOMM", {
- { "QUALCOMM Adreno(TM)", { 64, 1, 4 } },
- { "default", { 64, 1, 4 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 2, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgerDouble = {
- "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } },
- { "Ellesmere", { 64, 1, 4 } },
- { "Fiji", { 256, 1, 2 } },
- { "Hawaii", { 32, 4, 2 } },
- { "Oland", { 128, 1, 2 } },
- { "Pitcairn", { 64, 1, 1 } },
- { "Tahiti", { 64, 2, 1 } },
- { "Tonga", { 8, 16, 2 } },
- { "default", { 128, 2, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 64, 4, 1 } },
- { "default", { 64, 4, 1 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } },
- { "default", { 256, 1, 4 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 128, 8, 2 } },
- { "GeForce GTX 1070", { 32, 8, 1 } },
- { "GeForce GTX 1080", { 32, 2, 1 } },
- { "GeForce GTX 480", { 32, 4, 2 } },
- { "GeForce GTX 670", { 32, 32, 2 } },
- { "GeForce GTX 680", { 128, 4, 2 } },
- { "GeForce GTX 750", { 256, 2, 2 } },
- { "GeForce GTX 750 Ti", { 32, 16, 1 } },
- { "GeForce GTX TITAN", { 16, 8, 2 } },
- { "GeForce GTX TITAN Black", { 32, 4, 2 } },
- { "TITAN X (Pascal)", { 32, 2, 1 } },
- { "default", { 128, 1, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 128, 1, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry XgerComplexDouble = {
- "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, {
- { // AMD GPUs
- kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } },
- { "Ellesmere", { 8, 16, 1 } },
- { "Fiji", { 64, 4, 2 } },
- { "Hawaii", { 128, 1, 1 } },
- { "Oland", { 16, 16, 2 } },
- { "Pitcairn", { 64, 4, 1 } },
- { "Tahiti", { 32, 4, 1 } },
- { "Tonga", { 16, 4, 1 } },
- { "default", { 32, 4, 1 } },
- }
- },
- { // ARM GPUs
- kDeviceTypeGPU, "ARM", {
- { "Mali-T628", { 64, 2, 4 } },
- { "default", { 64, 2, 4 } },
- }
- },
- { // Intel CPUs
- kDeviceTypeCPU, "Intel", {
- { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } },
- { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } },
- { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
- { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } },
- { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } },
- { "default", { 256, 2, 2 } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "GRID K520", { 16, 8, 2 } },
- { "GeForce GTX 1070", { 8, 128, 1 } },
- { "GeForce GTX 1080", { 8, 4, 1 } },
- { "GeForce GTX 480", { 64, 2, 2 } },
- { "GeForce GTX 670", { 8, 16, 2 } },
- { "GeForce GTX 680", { 8, 16, 1 } },
- { "GeForce GTX 750", { 8, 32, 4 } },
- { "GeForce GTX 750 Ti", { 32, 8, 2 } },
- { "GeForce GTX TITAN", { 32, 4, 2 } },
- { "GeForce GTX TITAN Black", { 16, 16, 2 } },
- { "TITAN X (Pascal)", { 4, 8, 1 } },
- { "default", { 16, 8, 2 } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { 64, 2, 2 } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/xger/xger.hpp b/src/database/kernels/xger/xger.hpp
new file mode 100644
index 00000000..284d1fc6
--- /dev/null
+++ b/src/database/kernels/xger/xger.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/xger/xger_16.hpp"
+#include "database/kernels/xger/xger_32.hpp"
+#include "database/kernels/xger/xger_3232.hpp"
+#include "database/kernels/xger/xger_64.hpp"
+#include "database/kernels/xger/xger_6464.hpp"
diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp
new file mode 100644
index 00000000..376716b7
--- /dev/null
+++ b/src/database/kernels/xger/xger_16.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgerHalf = {
+ "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "Ellesmere", { 64, 1, 2 } },
+ { "default", { 64, 1, 2 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } },
+ { "default", { 4, 8, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 64, 4, 2 } },
+ { "default", { 64, 4, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 1, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp
new file mode 100644
index 00000000..bc18f20e
--- /dev/null
+++ b/src/database/kernels/xger/xger_32.hpp
@@ -0,0 +1,89 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgerSingle = {
+ "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } },
+ { "ATI Radeon HD 6750M", { 16, 16, 4 } },
+ { "Ellesmere", { 64, 4, 2 } },
+ { "Fiji", { 256, 1, 1 } },
+ { "Hawaii", { 64, 2, 1 } },
+ { "Oland", { 32, 4, 2 } },
+ { "Pitcairn", { 64, 1, 1 } },
+ { "Tahiti", { 256, 1, 1 } },
+ { "Tonga", { 256, 1, 2 } },
+ { "Turks", { 64, 4, 2 } },
+ { "default", { 16, 16, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 64, 4, 4 } },
+ { "default", { 64, 4, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } },
+ { "default", { 128, 8, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 32, 1, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } },
+ { "Iris Pro", { 64, 1, 4 } },
+ { "default", { 32, 4, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 1, 2 } },
+ { "GeForce GT 650M", { 32, 16, 4 } },
+ { "GeForce GTX 1070", { 512, 1, 1 } },
+ { "GeForce GTX 1080", { 16, 4, 1 } },
+ { "GeForce GTX 480", { 256, 1, 4 } },
+ { "GeForce GTX 670", { 32, 8, 2 } },
+ { "GeForce GTX 680", { 128, 1, 4 } },
+ { "GeForce GTX 750", { 64, 16, 4 } },
+ { "GeForce GTX 750 Ti", { 64, 1, 2 } },
+ { "GeForce GTX TITAN", { 32, 4, 2 } },
+ { "GeForce GTX TITAN Black", { 32, 4, 2 } },
+ { "TITAN X (Pascal)", { 512, 2, 1 } },
+ { "default", { 128, 1, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 128, 1, 2 } },
+ { "default", { 128, 1, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 32, 4, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp
new file mode 100644
index 00000000..d2d1587f
--- /dev/null
+++ b/src/database/kernels/xger/xger_3232.hpp
@@ -0,0 +1,88 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgerComplexSingle = {
+ "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } },
+ { "ATI Radeon HD 6750M", { 16, 16, 1 } },
+ { "Ellesmere", { 16, 8, 2 } },
+ { "Fiji", { 128, 2, 1 } },
+ { "Hawaii", { 64, 1, 2 } },
+ { "Oland", { 4, 8, 1 } },
+ { "Pitcairn", { 128, 2, 1 } },
+ { "Tahiti", { 64, 2, 1 } },
+ { "Tonga", { 64, 1, 1 } },
+ { "Turks", { 128, 2, 1 } },
+ { "default", { 128, 2, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 128, 1, 1 } },
+ { "default", { 128, 1, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } },
+ { "default", { 256, 2, 4 } },
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics 530", { 32, 1, 2 } },
+ { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } },
+ { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } },
+ { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } },
+ { "Iris Pro", { 16, 2, 4 } },
+ { "default", { 128, 2, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 64, 4, 2 } },
+ { "GeForce GTX 1070", { 16, 64, 2 } },
+ { "GeForce GTX 1080", { 32, 2, 1 } },
+ { "GeForce GTX 480", { 128, 2, 2 } },
+ { "GeForce GTX 670", { 16, 32, 2 } },
+ { "GeForce GTX 680", { 32, 4, 2 } },
+ { "GeForce GTX 750", { 32, 16, 4 } },
+ { "GeForce GTX 750 Ti", { 32, 8, 2 } },
+ { "GeForce GTX TITAN", { 16, 16, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 16, 2 } },
+ { "TITAN X (Pascal)", { 32, 2, 1 } },
+ { "default", { 128, 2, 2 } },
+ }
+ },
+ { // QUALCOMM GPUs
+ kDeviceTypeGPU, "QUALCOMM", {
+ { "QUALCOMM Adreno(TM)", { 64, 1, 4 } },
+ { "default", { 64, 1, 4 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 2, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp
new file mode 100644
index 00000000..304fcaf4
--- /dev/null
+++ b/src/database/kernels/xger/xger_64.hpp
@@ -0,0 +1,69 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgerDouble = {
+ "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } },
+ { "Ellesmere", { 64, 1, 4 } },
+ { "Fiji", { 256, 1, 2 } },
+ { "Hawaii", { 32, 4, 2 } },
+ { "Oland", { 128, 1, 2 } },
+ { "Pitcairn", { 64, 1, 1 } },
+ { "Tahiti", { 64, 2, 1 } },
+ { "Tonga", { 8, 16, 2 } },
+ { "default", { 128, 2, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 64, 4, 1 } },
+ { "default", { 64, 4, 1 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } },
+ { "default", { 256, 1, 4 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 128, 8, 2 } },
+ { "GeForce GTX 1070", { 32, 8, 1 } },
+ { "GeForce GTX 1080", { 32, 2, 1 } },
+ { "GeForce GTX 480", { 32, 4, 2 } },
+ { "GeForce GTX 670", { 32, 32, 2 } },
+ { "GeForce GTX 680", { 128, 4, 2 } },
+ { "GeForce GTX 750", { 256, 2, 2 } },
+ { "GeForce GTX 750 Ti", { 32, 16, 1 } },
+ { "GeForce GTX TITAN", { 16, 8, 2 } },
+ { "GeForce GTX TITAN Black", { 32, 4, 2 } },
+ { "TITAN X (Pascal)", { 32, 2, 1 } },
+ { "default", { 128, 1, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 128, 1, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp
new file mode 100644
index 00000000..dd7e6572
--- /dev/null
+++ b/src/database/kernels/xger/xger_6464.hpp
@@ -0,0 +1,69 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Xger6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry XgerComplexDouble = {
+ "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, {
+ { // AMD GPUs
+ kDeviceTypeGPU, "AMD", {
+ { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } },
+ { "Ellesmere", { 8, 16, 1 } },
+ { "Fiji", { 64, 4, 2 } },
+ { "Hawaii", { 128, 1, 1 } },
+ { "Oland", { 16, 16, 2 } },
+ { "Pitcairn", { 64, 4, 1 } },
+ { "Tahiti", { 32, 4, 1 } },
+ { "Tonga", { 16, 4, 1 } },
+ { "default", { 32, 4, 1 } },
+ }
+ },
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "Mali-T628", { 64, 2, 4 } },
+ { "default", { 64, 2, 4 } },
+ }
+ },
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } },
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } },
+ { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
+ { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } },
+ { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } },
+ { "default", { 256, 2, 2 } },
+ }
+ },
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, "NVIDIA", {
+ { "GRID K520", { 16, 8, 2 } },
+ { "GeForce GTX 1070", { 8, 128, 1 } },
+ { "GeForce GTX 1080", { 8, 4, 1 } },
+ { "GeForce GTX 480", { 64, 2, 2 } },
+ { "GeForce GTX 670", { 8, 16, 2 } },
+ { "GeForce GTX 680", { 8, 16, 1 } },
+ { "GeForce GTX 750", { 8, 32, 4 } },
+ { "GeForce GTX 750 Ti", { 32, 8, 2 } },
+ { "GeForce GTX TITAN", { 32, 4, 2 } },
+ { "GeForce GTX TITAN Black", { 16, 16, 2 } },
+ { "TITAN X (Pascal)", { 4, 8, 1 } },
+ { "default", { 16, 8, 2 } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { 64, 2, 2 } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp
index 6633b8b7..7c22aa57 100644
--- a/src/database/kernels/xtrsv.hpp
+++ b/src/database/kernels/xtrsv.hpp
@@ -15,7 +15,7 @@ namespace clblast {
namespace database {
// =================================================================================================
-const Database::DatabaseEntry XtrsvHalf = {
+const DatabaseEntry XtrsvHalf = {
"Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -27,7 +27,7 @@ const Database::DatabaseEntry XtrsvHalf = {
// =================================================================================================
-const Database::DatabaseEntry XtrsvSingle = {
+const DatabaseEntry XtrsvSingle = {
"Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -39,7 +39,7 @@ const Database::DatabaseEntry XtrsvSingle = {
// =================================================================================================
-const Database::DatabaseEntry XtrsvComplexSingle = {
+const DatabaseEntry XtrsvComplexSingle = {
"Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -51,7 +51,7 @@ const Database::DatabaseEntry XtrsvComplexSingle = {
// =================================================================================================
-const Database::DatabaseEntry XtrsvDouble = {
+const DatabaseEntry XtrsvDouble = {
"Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@@ -63,7 +63,7 @@ const Database::DatabaseEntry XtrsvDouble = {
// =================================================================================================
-const Database::DatabaseEntry XtrsvComplexDouble = {
+const DatabaseEntry XtrsvComplexDouble = {
"Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {