From 048fe90e57a00ca33681feb716474e2046ec7f5b Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 7 Apr 2018 17:33:57 +0200 Subject: Added tuning results for Intel HD Graphics 620 --- src/database/kernels/copy/copy_16.hpp | 1 + src/database/kernels/copy/copy_32.hpp | 3 ++- src/database/kernels/copy/copy_3232.hpp | 3 ++- src/database/kernels/copy/copy_64.hpp | 8 ++++++++ src/database/kernels/copy/copy_6464.hpp | 8 ++++++++ src/database/kernels/pad/pad_16.hpp | 1 + src/database/kernels/pad/pad_32.hpp | 1 + src/database/kernels/pad/pad_3232.hpp | 1 + src/database/kernels/pad/pad_64.hpp | 8 ++++++++ src/database/kernels/pad/pad_6464.hpp | 8 ++++++++ src/database/kernels/padtranspose/padtranspose_16.hpp | 1 + src/database/kernels/padtranspose/padtranspose_32.hpp | 1 + src/database/kernels/padtranspose/padtranspose_3232.hpp | 5 +++-- src/database/kernels/padtranspose/padtranspose_64.hpp | 8 ++++++++ src/database/kernels/padtranspose/padtranspose_6464.hpp | 8 ++++++++ src/database/kernels/transpose/transpose_16.hpp | 3 ++- src/database/kernels/transpose/transpose_32.hpp | 1 + src/database/kernels/transpose/transpose_3232.hpp | 1 + src/database/kernels/transpose/transpose_64.hpp | 8 ++++++++ src/database/kernels/transpose/transpose_6464.hpp | 8 ++++++++ src/database/kernels/xaxpy/xaxpy_16.hpp | 1 + src/database/kernels/xaxpy/xaxpy_32.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_3232.hpp | 1 + src/database/kernels/xaxpy/xaxpy_64.hpp | 8 ++++++++ src/database/kernels/xaxpy/xaxpy_6464.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_16.hpp | 1 + src/database/kernels/xdot/xdot_32.hpp | 1 + src/database/kernels/xdot/xdot_3232.hpp | 1 + src/database/kernels/xdot/xdot_64.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_6464.hpp | 8 ++++++++ src/database/kernels/xgemm/xgemm_16.hpp | 7 ++++--- src/database/kernels/xgemm/xgemm_32.hpp | 1 + src/database/kernels/xgemm/xgemm_3232.hpp | 1 + src/database/kernels/xgemm/xgemm_64.hpp | 8 ++++++++ src/database/kernels/xgemm/xgemm_6464.hpp | 8 ++++++++ src/database/kernels/xgemm_direct/xgemm_direct_16.hpp | 1 + src/database/kernels/xgemm_direct/xgemm_direct_32.hpp | 1 + src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp | 1 + src/database/kernels/xgemm_direct/xgemm_direct_64.hpp | 10 +++++++++- src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp | 8 ++++++++ src/database/kernels/xger/xger_16.hpp | 1 + src/database/kernels/xger/xger_32.hpp | 3 ++- src/database/kernels/xger/xger_3232.hpp | 3 ++- src/database/kernels/xger/xger_64.hpp | 8 ++++++++ src/database/kernels/xger/xger_6464.hpp | 8 ++++++++ 45 files changed, 183 insertions(+), 12 deletions(-) (limited to 'src/database') diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp index 0249ee77..5fe4abf1 100644 --- a/src/database/kernels/copy/copy_16.hpp +++ b/src/database/kernels/copy/copy_16.hpp @@ -36,6 +36,7 @@ const DatabaseEntry CopyHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index 5a5393b4..f68d329c 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry CopySingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -158,7 +159,7 @@ const DatabaseEntry CopySingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 9ff50acf..62edb1fe 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -85,11 +85,12 @@ const DatabaseEntry CopyComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 32, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index 5ddbf118..53a5dca9 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry CopyDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index b7abd88b..806eb659 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry CopyComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp index c481e2fa..c5a5c91e 100644 --- a/src/database/kernels/pad/pad_16.hpp +++ b/src/database/kernels/pad/pad_16.hpp @@ -36,6 +36,7 @@ const DatabaseEntry PadHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 4275c67d..2b7a893f 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry PadSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index d06bc17e..7f7450e1 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -85,6 +85,7 @@ const DatabaseEntry PadComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 32, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index cdb62b81..cf2e0d2e 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry PadDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 4a4bd7eb..32f520a8 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry PadComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp index 51afde55..70432bbe 100644 --- a/src/database/kernels/padtranspose/padtranspose_16.hpp +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -36,6 +36,7 @@ const DatabaseEntry PadtransposeHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index 1731dcdf..9f989984 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry PadtransposeSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index 4cbf0bdf..e2e3bf42 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -85,11 +85,12 @@ const DatabaseEntry PadtransposeComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -155,7 +156,7 @@ const DatabaseEntry PadtransposeComplexSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index dde3cba6..2f6e5100 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry PadtransposeDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index c4ca9f2d..3a9067d9 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry PadtransposeComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp index ded75554..ae33859d 100644 --- a/src/database/kernels/transpose/transpose_16.hpp +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -36,8 +36,9 @@ const DatabaseEntry TransposeHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 1, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 1, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index 69da994d..c98fcf9b 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry TransposeSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index eb9e75cf..73089fe9 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -85,6 +85,7 @@ const DatabaseEntry TransposeComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index 248e63d7..f216b2d4 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry TransposeDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index 541f9c8a..d810bcbb 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry TransposeComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp index fd678f2e..080a70f4 100644 --- a/src/database/kernels/xaxpy/xaxpy_16.hpp +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -36,6 +36,7 @@ const DatabaseEntry XaxpyHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index c175f9e1..20801de4 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -86,12 +86,13 @@ const DatabaseEntry XaxpySingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index ad604e02..29ac046f 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -85,6 +85,7 @@ const DatabaseEntry XaxpyComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 4, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index 81cf2f26..69547315 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry XaxpyDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index b0c76442..186a34eb 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry XaxpyComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp index bdb7558d..ec8321e6 100644 --- a/src/database/kernels/xdot/xdot_16.hpp +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -36,6 +36,7 @@ const DatabaseEntry XdotHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index 8c17891a..ffb99512 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -80,6 +80,7 @@ const DatabaseEntry XdotSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index cbf8a868..265fd33c 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -79,6 +79,7 @@ const DatabaseEntry XdotComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index a6ee267a..f3cb2e1e 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -66,6 +66,14 @@ const DatabaseEntry XdotDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index fa8d7082..83acd1c9 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -66,6 +66,14 @@ const DatabaseEntry XdotComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp index e375d317..62a5f1ff 100644 --- a/src/database/kernels/xgemm/xgemm_16.hpp +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -35,15 +35,16 @@ const DatabaseEntry XgemmHalf = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { - { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 1, 1 } }, + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, } }, } }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 32, 1, 1, 0, 0, 4, 4 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index e105d9ac..26302a9f 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry XgemmSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 2, 32, 8, 64, 16, 16, 128, 0, 0, 0, 1, 1, 2 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 2, 16, 16, 64, 8, 16, 128, 1, 1, 0, 1, 1, 4 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index c6406d21..b9349030 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -85,6 +85,7 @@ const DatabaseEntry XgemmComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 2, 16, 8, 32, 8, 8, 32, 0, 0, 1, 0, 1, 1 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, { Name{"Iris "}, Params{ 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index b34a616f..a59e5583 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -72,6 +72,14 @@ const DatabaseEntry XgemmDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 2, 16, 8, 32, 8, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 8, 32, 8, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 368833ee..37fbc659 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -72,6 +72,14 @@ const DatabaseEntry XgemmComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 2, 1 } }, + } }, + } + }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp index 9bdc0537..ff40e6a4 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -35,6 +35,7 @@ const DatabaseEntry XgemmDirectHalf = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index 8265677c..2b9a0817 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -66,6 +66,7 @@ const DatabaseEntry XgemmDirectSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 16, 8, 16, 16, 1, 0, 2, 2, 32, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index 0bc35785..ff83f3e5 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -61,6 +61,7 @@ const DatabaseEntry XgemmDirectComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index 0ef61440..cceefa56 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -54,6 +54,14 @@ const DatabaseEntry XgemmDirectDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { @@ -86,7 +94,7 @@ const DatabaseEntry XgemmDirectDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index c4667291..d124735f 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -54,6 +54,14 @@ const DatabaseEntry XgemmDirectComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp index 26f54f1a..394a4628 100644 --- a/src/database/kernels/xger/xger_16.hpp +++ b/src/database/kernels/xger/xger_16.hpp @@ -35,6 +35,7 @@ const DatabaseEntry XgerHalf = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index e32222e0..a795cead 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -85,11 +85,12 @@ const DatabaseEntry XgerSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 64, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index 007075b2..914cce22 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -84,10 +84,11 @@ const DatabaseEntry XgerComplexSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Iris Pro "}, Params{ 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index 565b20c1..5fc0d389 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -71,6 +71,14 @@ const DatabaseEntry XgerDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 4bcd3c10..50216f4e 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -71,6 +71,14 @@ const DatabaseEntry XgerComplexDouble = { } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM2.0", { -- cgit v1.2.3 From 9596e46d01655fe0ac9efd97f711a3350313c8ab Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 7 Apr 2018 17:44:32 +0200 Subject: Added tuning results for NVIDIA GeForce 920MX --- CHANGELOG | 1 + doc/tuning.md | 2 ++ src/database/kernels/copy/copy_32.hpp | 3 ++- src/database/kernels/copy/copy_3232.hpp | 1 + src/database/kernels/copy/copy_64.hpp | 1 + src/database/kernels/copy/copy_6464.hpp | 1 + src/database/kernels/invert/invert_32.hpp | 11 +++++++++++ src/database/kernels/invert/invert_3232.hpp | 13 ++++++++++++- src/database/kernels/invert/invert_64.hpp | 13 ++++++++++++- src/database/kernels/invert/invert_6464.hpp | 11 +++++++++++ src/database/kernels/pad/pad_32.hpp | 3 ++- src/database/kernels/pad/pad_3232.hpp | 1 + src/database/kernels/pad/pad_64.hpp | 1 + src/database/kernels/pad/pad_6464.hpp | 1 + src/database/kernels/padtranspose/padtranspose_32.hpp | 1 + src/database/kernels/padtranspose/padtranspose_3232.hpp | 1 + src/database/kernels/padtranspose/padtranspose_64.hpp | 1 + src/database/kernels/padtranspose/padtranspose_6464.hpp | 1 + src/database/kernels/transpose/transpose_32.hpp | 1 + src/database/kernels/transpose/transpose_3232.hpp | 1 + src/database/kernels/transpose/transpose_64.hpp | 1 + src/database/kernels/transpose/transpose_6464.hpp | 1 + src/database/kernels/xaxpy/xaxpy_32.hpp | 1 + src/database/kernels/xaxpy/xaxpy_3232.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_64.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_6464.hpp | 1 + src/database/kernels/xdot/xdot_32.hpp | 3 ++- src/database/kernels/xdot/xdot_3232.hpp | 5 +++-- src/database/kernels/xdot/xdot_64.hpp | 3 ++- src/database/kernels/xdot/xdot_6464.hpp | 3 ++- src/database/kernels/xgemm/xgemm_32.hpp | 1 + src/database/kernels/xgemm/xgemm_3232.hpp | 3 ++- src/database/kernels/xgemm/xgemm_64.hpp | 3 ++- src/database/kernels/xgemm/xgemm_6464.hpp | 3 ++- src/database/kernels/xgemm_direct/xgemm_direct_32.hpp | 3 ++- src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp | 5 +++-- src/database/kernels/xgemm_direct/xgemm_direct_64.hpp | 3 ++- src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp | 3 ++- src/database/kernels/xgemv/xgemv_32.hpp | 3 ++- src/database/kernels/xgemv/xgemv_3232.hpp | 1 + src/database/kernels/xgemv/xgemv_64.hpp | 1 + src/database/kernels/xgemv_fast/xgemv_fast_32.hpp | 3 ++- src/database/kernels/xgemv_fast/xgemv_fast_64.hpp | 1 + src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp | 1 + src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp | 1 + src/database/kernels/xger/xger_32.hpp | 3 ++- src/database/kernels/xger/xger_3232.hpp | 3 ++- src/database/kernels/xger/xger_64.hpp | 3 ++- src/database/kernels/xger/xger_6464.hpp | 1 + 49 files changed, 114 insertions(+), 24 deletions(-) (limited to 'src/database') diff --git a/CHANGELOG b/CHANGELOG index 5815e343..9cf9f167 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,7 @@ Development (next version) - Re-added a local memory size constraint to the tuners - Updated and reorganised the CLBlast documentation - Various minor fixes and enhancements +- Added tuned parameters for various devices (see doc/tuning.md) - Added non-BLAS level-1 routines: * SHAD/DHAD/CHAD/ZHAD/HHAD (Hadamard element-wise vector-vector product) diff --git a/doc/tuning.md b/doc/tuning.md index ebf3cb0c..70b6e589 100644 --- a/doc/tuning.md +++ b/doc/tuning.md @@ -19,6 +19,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - GeForce GTX 750 - GeForce GTX 750 Ti - GeForce GTX 760 Ti + - GeForce GTX 920MX - GeForce GTX 980 - GeForce GTX 1070 - GeForce GTX 1080 @@ -43,6 +44,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a * Intel GPUs: - HD Graphics 530 - HD Graphics 5500 BroadWell U-Processor GT2 + - HD Graphics 630 - HD Graphics Haswell Ultrabook GT2 Mobile - HD Graphics IvyBridge M GT2 - HD Graphics Skylake ULT GT2 diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index f68d329c..1e7111e3 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -127,9 +127,10 @@ const DatabaseEntry CopySingle = { { kDeviceNameDefault , Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 62edb1fe..1b2fb4d6 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -122,6 +122,7 @@ const DatabaseEntry CopyComplexSingle = { { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index 53a5dca9..a96db24c 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -110,6 +110,7 @@ const DatabaseEntry CopyDouble = { { kDeviceNameDefault , Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index 806eb659..2328cafc 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -110,6 +110,7 @@ const DatabaseEntry CopyComplexDouble = { { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp index ca07e947..94d73da5 100644 --- a/src/database/kernels/invert/invert_32.hpp +++ b/src/database/kernels/invert/invert_32.hpp @@ -20,6 +20,17 @@ const DatabaseEntry InvertSingle = { } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp index f01b3c7f..a9a14168 100644 --- a/src/database/kernels/invert/invert_3232.hpp +++ b/src/database/kernels/invert/invert_3232.hpp @@ -20,10 +20,21 @@ const DatabaseEntry InvertComplexSingle = { } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp index e73120ca..1110b949 100644 --- a/src/database/kernels/invert/invert_64.hpp +++ b/src/database/kernels/invert/invert_64.hpp @@ -12,10 +12,21 @@ namespace database { const DatabaseEntry InvertDouble = { "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp index 184b956a..26fd7fc8 100644 --- a/src/database/kernels/invert/invert_6464.hpp +++ b/src/database/kernels/invert/invert_6464.hpp @@ -12,6 +12,17 @@ namespace database { const DatabaseEntry InvertComplexDouble = { "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 2b7a893f..1ae0b1c4 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -127,9 +127,10 @@ const DatabaseEntry PadSingle = { { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index 7f7450e1..f931e798 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -124,6 +124,7 @@ const DatabaseEntry PadComplexSingle = { { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index cf2e0d2e..22ecbbe7 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -110,6 +110,7 @@ const DatabaseEntry PadDouble = { { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 32f520a8..32567dfc 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -110,6 +110,7 @@ const DatabaseEntry PadComplexDouble = { { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index 9f989984..f1841ff5 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -126,6 +126,7 @@ const DatabaseEntry PadtransposeSingle = { { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index e2e3bf42..eaa83e8c 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -124,6 +124,7 @@ const DatabaseEntry PadtransposeComplexSingle = { { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index 2f6e5100..b3fda42e 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -110,6 +110,7 @@ const DatabaseEntry PadtransposeDouble = { { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index 3a9067d9..1456a59a 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -110,6 +110,7 @@ const DatabaseEntry PadtransposeComplexDouble = { { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index c98fcf9b..5907174a 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -127,6 +127,7 @@ const DatabaseEntry TransposeSingle = { { kDeviceNameDefault , Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 4, 0, 1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 4, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index 73089fe9..9f220db3 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -116,6 +116,7 @@ const DatabaseEntry TransposeComplexSingle = { { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index f216b2d4..8f75268e 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -110,6 +110,7 @@ const DatabaseEntry TransposeDouble = { { kDeviceNameDefault , Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index d810bcbb..6d073257 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -102,6 +102,7 @@ const DatabaseEntry TransposeComplexDouble = { { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index 20801de4..407ec71d 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -127,6 +127,7 @@ const DatabaseEntry XaxpySingle = { { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index 29ac046f..70728fcb 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -124,9 +124,10 @@ const DatabaseEntry XaxpyComplexSingle = { { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index 69547315..c61d78a1 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -110,9 +110,10 @@ const DatabaseEntry XaxpyDouble = { { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 186a34eb..2dd7c2ef 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -110,6 +110,7 @@ const DatabaseEntry XaxpyComplexDouble = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index ffb99512..55361578 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -110,9 +110,10 @@ const DatabaseEntry XdotSingle = { { kDeviceNameDefault , Params{ 1024, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index 265fd33c..a962c288 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -107,9 +107,10 @@ const DatabaseEntry XdotComplexSingle = { { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -124,7 +125,7 @@ const DatabaseEntry XdotComplexSingle = { { kDeviceNameDefault , Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 512, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index f3cb2e1e..bbdf5505 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -94,9 +94,10 @@ const DatabaseEntry XdotDouble = { { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index 83acd1c9..0d97dcd4 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -94,9 +94,10 @@ const DatabaseEntry XdotComplexDouble = { { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 64, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index 26302a9f..f15b6def 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -127,6 +127,7 @@ const DatabaseEntry XgemmSingle = { { kDeviceNameDefault , Params{ 16, 8, 32, 16, 64, 32, 16, 64, 1, 0, 1, 0, 2, 2 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 16, 2, 32, 8, 128, 8, 16, 128, 1, 1, 1, 0, 4, 1 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 32, 32, 64, 0, 0, 0, 0, 2, 1 } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index b9349030..b8555d6a 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -124,9 +124,10 @@ const DatabaseEntry XgemmComplexSingle = { { kDeviceNameDefault , Params{ 32, 2, 16, 16, 128, 8, 8, 64, 0, 1, 0, 1, 8, 2 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 2, 16, 8, 64, 8, 32, 128, 0, 0, 1, 0, 2, 2 } }, { Name{"GeForce GTX 750 "}, Params{ 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, - { kDeviceNameDefault , Params{ 16, 2, 16, 8, 32, 16, 16, 64, 1, 1, 1, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 1 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index a59e5583..541c8d9a 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -110,9 +110,10 @@ const DatabaseEntry XgemmDouble = { { kDeviceNameDefault , Params{ 16, 2, 16, 8, 16, 16, 8, 16, 1, 0, 0, 0, 1, 1 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 2, 8, 8, 32, 8, 8, 32, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, - { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 16, 8, 32, 0, 0, 0, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 2, 2 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 37fbc659..9c68a143 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -109,9 +109,10 @@ const DatabaseEntry XgemmComplexDouble = { { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 16, 16, 64, 0, 0, 0, 0, 4, 1 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 2, 8, 8, 64, 8, 8, 32, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, - { kDeviceNameDefault , Params{ 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 32, 0, 0, 0, 0, 2, 2 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index 2b9a0817..071bf40c 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -90,8 +90,9 @@ const DatabaseEntry XgemmDirectSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 16, 16, 8, 16, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index ff83f3e5..8607517a 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -83,8 +83,9 @@ const DatabaseEntry XgemmDirectComplexSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 16, 16, 16, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 8, 16, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 8, 16, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 16, 16, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, @@ -93,7 +94,7 @@ const DatabaseEntry XgemmDirectComplexSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index cceefa56..67c3b558 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -77,8 +77,9 @@ const DatabaseEntry XgemmDirectDouble = { { kDeviceNameDefault , Params{ 8, 16, 16, 16, 8, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index d124735f..3c045a1a 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -77,8 +77,9 @@ const DatabaseEntry XgemmDirectComplexDouble = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index 6112d8fb..c29c4ae0 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -125,9 +125,10 @@ const DatabaseEntry XgemvSingle = { { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index e8b557ef..4f3b39ce 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -120,6 +120,7 @@ const DatabaseEntry XgemvComplexSingle = { { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index f2d03f5a..7572de01 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -101,6 +101,7 @@ const DatabaseEntry XgemvDouble = { { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index 255720cd..798d7f90 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -125,9 +125,10 @@ const DatabaseEntry XgemvFastSingle = { { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 2, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index dd9e90ad..7c5afbc6 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -101,6 +101,7 @@ const DatabaseEntry XgemvFastDouble = { { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index a77d8272..7321773a 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -92,6 +92,7 @@ const DatabaseEntry XgemvFastRotSingle = { { kDeviceNameDefault , Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index 29a7a219..eeef2fce 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XgemvFastRotDouble = { { kDeviceNameDefault , Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 8, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index a795cead..10625d72 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -115,9 +115,10 @@ const DatabaseEntry XgerSingle = { { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 64, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index 914cce22..0db11d63 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -112,9 +112,10 @@ const DatabaseEntry XgerComplexSingle = { { kDeviceNameDefault , Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 32, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index 5fc0d389..b1fcbfed 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -99,9 +99,10 @@ const DatabaseEntry XgerDouble = { { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 50216f4e..92793f92 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -99,6 +99,7 @@ const DatabaseEntry XgerComplexDouble = { { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.0", { + { Name{"GeForce 920MX "}, Params{ 256, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 "}, Params{ 8, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, -- cgit v1.2.3 From 16f7f496838cfb5faa0892dd95226a36dc9507a4 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 7 Apr 2018 17:48:25 +0200 Subject: Added tuning results for NVIDIA GeForce 970 --- doc/tuning.md | 1 + src/database/kernels/copy/copy_32.hpp | 5 +++-- src/database/kernels/copy/copy_3232.hpp | 1 + src/database/kernels/copy/copy_64.hpp | 1 + src/database/kernels/copy/copy_6464.hpp | 3 ++- src/database/kernels/gemm_routine/gemm_routine_32.hpp | 8 ++++++-- src/database/kernels/invert/invert_32.hpp | 4 ++++ src/database/kernels/invert/invert_3232.hpp | 4 ++++ src/database/kernels/invert/invert_64.hpp | 4 ++++ src/database/kernels/invert/invert_6464.hpp | 8 ++++++-- src/database/kernels/pad/pad_32.hpp | 3 ++- src/database/kernels/pad/pad_3232.hpp | 1 + src/database/kernels/pad/pad_64.hpp | 3 ++- src/database/kernels/pad/pad_6464.hpp | 1 + src/database/kernels/padtranspose/padtranspose_32.hpp | 3 ++- src/database/kernels/padtranspose/padtranspose_3232.hpp | 1 + src/database/kernels/padtranspose/padtranspose_64.hpp | 1 + src/database/kernels/padtranspose/padtranspose_6464.hpp | 3 ++- src/database/kernels/transpose/transpose_32.hpp | 3 ++- src/database/kernels/transpose/transpose_3232.hpp | 1 + src/database/kernels/transpose/transpose_64.hpp | 1 + src/database/kernels/transpose/transpose_6464.hpp | 1 + src/database/kernels/xaxpy/xaxpy_32.hpp | 5 +++-- src/database/kernels/xaxpy/xaxpy_3232.hpp | 5 +++-- src/database/kernels/xaxpy/xaxpy_64.hpp | 5 +++-- src/database/kernels/xaxpy/xaxpy_6464.hpp | 3 ++- src/database/kernels/xdot/xdot_32.hpp | 3 ++- src/database/kernels/xdot/xdot_3232.hpp | 5 +++-- src/database/kernels/xdot/xdot_64.hpp | 3 ++- src/database/kernels/xdot/xdot_6464.hpp | 3 ++- src/database/kernels/xgemm/xgemm_32.hpp | 3 ++- src/database/kernels/xgemm/xgemm_3232.hpp | 3 ++- src/database/kernels/xgemm/xgemm_64.hpp | 5 +++-- src/database/kernels/xgemm/xgemm_6464.hpp | 3 ++- src/database/kernels/xgemm_direct/xgemm_direct_32.hpp | 4 ++++ src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp | 4 ++++ src/database/kernels/xgemm_direct/xgemm_direct_64.hpp | 4 ++++ src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp | 4 ++++ src/database/kernels/xgemv/xgemv_32.hpp | 1 + src/database/kernels/xgemv/xgemv_3232.hpp | 4 ++++ src/database/kernels/xgemv/xgemv_64.hpp | 1 + src/database/kernels/xgemv/xgemv_6464.hpp | 4 ++++ src/database/kernels/xgemv_fast/xgemv_fast_32.hpp | 1 + src/database/kernels/xgemv_fast/xgemv_fast_64.hpp | 3 ++- src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp | 4 ++++ src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp | 4 ++++ src/database/kernels/xger/xger_32.hpp | 4 ++++ src/database/kernels/xger/xger_3232.hpp | 6 +++++- src/database/kernels/xger/xger_64.hpp | 6 +++++- src/database/kernels/xger/xger_6464.hpp | 4 ++++ 50 files changed, 133 insertions(+), 32 deletions(-) (limited to 'src/database') diff --git a/doc/tuning.md b/doc/tuning.md index 70b6e589..60ad2cc7 100644 --- a/doc/tuning.md +++ b/doc/tuning.md @@ -20,6 +20,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - GeForce GTX 750 Ti - GeForce GTX 760 Ti - GeForce GTX 920MX + - GeForce GTX 970 - GeForce GTX 980 - GeForce GTX 1070 - GeForce GTX 1080 diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index 1e7111e3..bc7de3d9 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -133,9 +133,10 @@ const DatabaseEntry CopySingle = { { kDeviceNameDefault , Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -145,7 +146,7 @@ const DatabaseEntry CopySingle = { { kDeviceNameDefault , Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 1b2fb4d6..9c01a5df 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -128,6 +128,7 @@ const DatabaseEntry CopyComplexSingle = { { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index a96db24c..c6dce74c 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -116,6 +116,7 @@ const DatabaseEntry CopyDouble = { { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index 2328cafc..32f70103 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -116,6 +116,7 @@ const DatabaseEntry CopyComplexDouble = { { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -128,7 +129,7 @@ const DatabaseEntry CopyComplexDouble = { { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp index 0056bf7c..479b314e 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_32.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp @@ -42,20 +42,24 @@ const DatabaseEntry GemmRoutineSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 Ti "}, Params{ 1792, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1344, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp index 94d73da5..82deab6f 100644 --- a/src/database/kernels/invert/invert_32.hpp +++ b/src/database/kernels/invert/invert_32.hpp @@ -26,6 +26,10 @@ const DatabaseEntry InvertSingle = { { Name{"GeForce 920MX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp index a9a14168..fec3e8d3 100644 --- a/src/database/kernels/invert/invert_3232.hpp +++ b/src/database/kernels/invert/invert_3232.hpp @@ -26,6 +26,10 @@ const DatabaseEntry InvertComplexSingle = { { Name{"GeForce 920MX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp index 1110b949..843c96a7 100644 --- a/src/database/kernels/invert/invert_64.hpp +++ b/src/database/kernels/invert/invert_64.hpp @@ -18,6 +18,10 @@ const DatabaseEntry InvertDouble = { { Name{"GeForce 920MX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp index 26fd7fc8..1ee2e7a5 100644 --- a/src/database/kernels/invert/invert_6464.hpp +++ b/src/database/kernels/invert/invert_6464.hpp @@ -18,15 +18,19 @@ const DatabaseEntry InvertComplexDouble = { { Name{"GeForce 920MX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { - { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 1ae0b1c4..a6a902c1 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -133,9 +133,10 @@ const DatabaseEntry PadSingle = { { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index f931e798..cfdf5841 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -130,6 +130,7 @@ const DatabaseEntry PadComplexSingle = { { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index 22ecbbe7..c5f4918a 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -116,6 +116,7 @@ const DatabaseEntry PadDouble = { { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -128,7 +129,7 @@ const DatabaseEntry PadDouble = { { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 32567dfc..457f46c2 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -116,6 +116,7 @@ const DatabaseEntry PadComplexDouble = { { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index f1841ff5..1d39400d 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -132,9 +132,10 @@ const DatabaseEntry PadtransposeSingle = { { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index eaa83e8c..8f6c8783 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -130,6 +130,7 @@ const DatabaseEntry PadtransposeComplexSingle = { { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index b3fda42e..837c7d6c 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -116,6 +116,7 @@ const DatabaseEntry PadtransposeDouble = { { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index 1456a59a..e9d6ca27 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -116,9 +116,10 @@ const DatabaseEntry PadtransposeComplexDouble = { { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index 5907174a..55bab07b 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -133,9 +133,10 @@ const DatabaseEntry TransposeSingle = { { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 4, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index 9f220db3..2680c48b 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -122,6 +122,7 @@ const DatabaseEntry TransposeComplexSingle = { { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index 8f75268e..ea45ae79 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -116,6 +116,7 @@ const DatabaseEntry TransposeDouble = { { kDeviceNameDefault , Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index 6d073257..fe92c8a0 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -108,6 +108,7 @@ const DatabaseEntry TransposeComplexDouble = { { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index 407ec71d..a38f9ef4 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -133,9 +133,10 @@ const DatabaseEntry XaxpySingle = { { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -145,7 +146,7 @@ const DatabaseEntry XaxpySingle = { { kDeviceNameDefault , Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index 70728fcb..a3723909 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -130,9 +130,10 @@ const DatabaseEntry XaxpyComplexSingle = { { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -142,7 +143,7 @@ const DatabaseEntry XaxpyComplexSingle = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index c61d78a1..17c301aa 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -116,9 +116,10 @@ const DatabaseEntry XaxpyDouble = { { kDeviceNameDefault , Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -128,7 +129,7 @@ const DatabaseEntry XaxpyDouble = { { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 2dd7c2ef..a109db58 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -116,9 +116,10 @@ const DatabaseEntry XaxpyComplexDouble = { { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index 55361578..68483c9e 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -116,9 +116,10 @@ const DatabaseEntry XdotSingle = { { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index a962c288..224a22e9 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -113,9 +113,10 @@ const DatabaseEntry XdotComplexSingle = { { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1024, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -125,7 +126,7 @@ const DatabaseEntry XdotComplexSingle = { { kDeviceNameDefault , Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index bbdf5505..787d29df 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -100,6 +100,7 @@ const DatabaseEntry XdotDouble = { { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -112,7 +113,7 @@ const DatabaseEntry XdotDouble = { { kDeviceNameDefault , Params{ 128, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index 0d97dcd4..74e86f8c 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -100,6 +100,7 @@ const DatabaseEntry XdotComplexDouble = { { kDeviceNameDefault , Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -112,7 +113,7 @@ const DatabaseEntry XdotComplexDouble = { { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index f15b6def..ef19fa60 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -133,9 +133,10 @@ const DatabaseEntry XgemmSingle = { { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 32, 32, 64, 0, 0, 0, 0, 2, 1 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 16, 64, 8, 8, 128, 1, 1, 1, 0, 4, 8 } }, + { kDeviceNameDefault , Params{ 16, 2, 16, 16, 32, 16, 8, 128, 1, 1, 1, 0, 1, 2 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index b8555d6a..a02d1b48 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -130,9 +130,10 @@ const DatabaseEntry XgemmComplexSingle = { { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 1 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 16, 2, 8, 16, 64, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, { Name{"GeForce GTX 980 "}, Params{ 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 8, 8, 32, 0, 0, 1, 0, 1, 1 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index 541c8d9a..f6bb474b 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -116,9 +116,10 @@ const DatabaseEntry XgemmDouble = { { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 2, 2 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 2, 8, 8, 16, 16, 16, 32, 0, 0, 0, 0, 2, 1 } }, { Name{"GeForce GTX 980 "}, Params{ 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, { Name{"GeForce GTX TITAN X "}, Params{ 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { kDeviceNameDefault , Params{ 16, 8, 16, 8, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, @@ -128,7 +129,7 @@ const DatabaseEntry XgemmDouble = { { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 64, 0, 0, 0, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 9c68a143..8b7ca852 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -115,9 +115,10 @@ const DatabaseEntry XgemmComplexDouble = { { kDeviceNameDefault , Params{ 32, 2, 16, 16, 32, 8, 8, 32, 0, 0, 0, 0, 2, 2 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 2, 16, 16, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, { Name{"GeForce GTX 980 "}, Params{ 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, { Name{"GeForce GTX TITAN X "}, Params{ 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 8, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 16, 8, 16, 32, 0, 0, 0, 0, 1, 1 } }, } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index 071bf40c..1562ff30 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry XgemmDirectSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 8, 8, 32, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 32, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 16, 16, 8, 16, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 8, 8, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index 8607517a..924b0288 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -87,6 +87,10 @@ const DatabaseEntry XgemmDirectComplexSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 16, 8, 8, 16, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 16, 16, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index 67c3b558..889ee916 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemmDirectDouble = { { Name{"GeForce GTX 750 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index 3c045a1a..9af9358c 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemmDirectComplexDouble = { { Name{"GeForce GTX 750 Ti "}, Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index c29c4ae0..5d934b71 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -131,6 +131,7 @@ const DatabaseEntry XgemvSingle = { { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index 4f3b39ce..f99194b2 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -125,6 +125,10 @@ const DatabaseEntry XgemvComplexSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index 7572de01..61690698 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -107,6 +107,7 @@ const DatabaseEntry XgemvDouble = { { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp index 2accd87d..0cce5e9e 100644 --- a/src/database/kernels/xgemv/xgemv_6464.hpp +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -92,6 +92,10 @@ const DatabaseEntry XgemvComplexDouble = { { Name{"GeForce GTX 760 Ti OEM "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index 798d7f90..72ae2656 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -131,6 +131,7 @@ const DatabaseEntry XgemvFastSingle = { { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index 7c5afbc6..ac5f4ac8 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -107,6 +107,7 @@ const DatabaseEntry XgemvFastDouble = { { kDeviceNameDefault , Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 980 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX TITAN X "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -119,7 +120,7 @@ const DatabaseEntry XgemvFastDouble = { { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index 7321773a..f5d8d250 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -96,6 +96,10 @@ const DatabaseEntry XgemvFastRotSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index eeef2fce..8a95ba33 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -74,6 +74,10 @@ const DatabaseEntry XgemvFastRotDouble = { { Name{"GeForce GTX 750 Ti "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index 10625d72..3d1ee923 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -120,6 +120,10 @@ const DatabaseEntry XgerSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index 0db11d63..abed69e3 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -117,6 +117,10 @@ const DatabaseEntry XgerComplexSingle = { { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 128, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -125,7 +129,7 @@ const DatabaseEntry XgerComplexSingle = { { kDeviceNameDefault , Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index b1fcbfed..6dfd7079 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -104,6 +104,10 @@ const DatabaseEntry XgerDouble = { { Name{"GeForce GTX 750 Ti "}, Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -112,7 +116,7 @@ const DatabaseEntry XgerDouble = { { kDeviceNameDefault , Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 92793f92..52555afe 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -104,6 +104,10 @@ const DatabaseEntry XgerComplexDouble = { { Name{"GeForce GTX 750 Ti "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM5.2", { + { Name{"GeForce GTX 970 "}, Params{ 256, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, -- cgit v1.2.3