diff options
77 files changed, 412 insertions, 53 deletions
diff --git a/.appveyor.yml b/.appveyor.yml index db99d08a..f07f2971 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -57,8 +57,8 @@ build_script: after_build: - ps: pushd $env:CLBLAST_BUILD - - 7z a CLBlast-1.4.0-Windows-x64.zip .\install_dir\* - - ps: mv CLBlast-1.4.0-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER + - 7z a CLBlast-1.4.1-Windows-x64.zip .\install_dir\* + - ps: mv CLBlast-1.4.1-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER artifacts: - path: '*.zip' diff --git a/.travis.yml b/.travis.yml index 17d9048c..fc60826c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ matrix: env: global: - - CLBLAST_VERSION=1.4.0 + - CLBLAST_VERSION=1.4.1 - CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast - CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION} - CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz @@ -1,4 +1,14 @@ +Development (next version) +- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah') +- Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel +- Various minor fixes and enhancements + +Version 1.4.1 +- Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded +- Fixed an issue with double cl_program release in the CLBlast caching system +- Added tuned parameters for various devices (see doc/tuning.md) + Version 1.4.0 - Added Python interface to CLBlast 'PyCLBlast' - Added CLBlast to Ubuntu PPA and macOS Homebrew package managers diff --git a/CMakeLists.txt b/CMakeLists.txt index b1a6de5b..ac775b63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ endif() project("clblast" C CXX) set(clblast_VERSION_MAJOR 1) set(clblast_VERSION_MINOR 4) -set(clblast_VERSION_PATCH 0) +set(clblast_VERSION_PATCH 1) set(clblast_VERSION "${clblast_VERSION_MAJOR}.${clblast_VERSION_MINOR}.${clblast_VERSION_PATCH}") set(clblast_SOVERSION ${clblast_VERSION_MAJOR}) diff --git a/doc/installation.md b/doc/installation.md index 8606de09..70c69321 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -25,6 +25,7 @@ The pre-requisites for compilation of CLBlast are kept as minimal as possible. A - Mesa Clover - ARM Mali OpenCL - Vivante OpenCL + - POCL Using pre-built packages diff --git a/doc/tuning.md b/doc/tuning.md index 5cf32ca8..938c3b6a 100644 --- a/doc/tuning.md +++ b/doc/tuning.md @@ -23,6 +23,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - GeForce GTX 970 - GeForce GTX 980 - GeForce GTX 1070 + - GeForce GTX 1070 Ti - GeForce GTX 1080 - GeForce GTX 1080 Ti - GeForce GTX TITAN @@ -45,6 +46,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a * Intel GPUs: - HD Graphics 530 - HD Graphics 5500 BroadWell U-Processor GT2 + - HD Graphics 6000 BroadWell U-Processor GT3 - HD Graphics 630 - HD Graphics Haswell Ultrabook GT2 Mobile - HD Graphics IvyBridge M GT2 @@ -53,6 +55,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - Iris Pro * Intel CPUs: - Core i5-4570 + - Core i5-4590S - Core i5-6200U - Core i7-920 - Core i7-2670QM diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 819bd01c..414bc64e 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -49,7 +49,7 @@ FILES = [ "/src/clblast_cuda.cpp", "/src/pyclblast/src/pyclblast.pyx" ] -HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 290] +HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 32, 95, 21, 290] FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 232 diff --git a/src/clpp11.hpp b/src/clpp11.hpp index ce6f39cb..8ac0523f 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -44,6 +44,7 @@ #include <numeric> // std::accumulate #include <cstring> // std::strlen #include <cstdio> // fprintf, stderr +#include <assert.h> // OpenCL #define CL_USE_DEPRECATED_OPENCL_1_1_APIS // to disable deprecation warnings @@ -355,6 +356,12 @@ class Device { std::string{"."} + std::to_string(GetInfo<cl_uint>(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV)); } + // Returns if the Nvidia chip is a Volta or later archicture (sm_70 or higher) + bool IsPostNVIDIAVolta() const { + assert(HasExtension("cl_nv_device_attribute_query")); + return GetInfo<cl_uint>(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV) >= 7; + } + // Retrieves the above extra information (if present) std::string GetExtraInfo() const { if (HasExtension("cl_amd_device_attribute_query")) { return AMDBoardName(); } @@ -463,7 +470,9 @@ class Program { // Clean-up ~Program() { - if (program_) { CheckErrorDtor(clReleaseProgram(program_)); } + #ifndef _MSC_VER // causes an access violation under Windows when the driver is already unloaded + if (program_) { CheckErrorDtor(clReleaseProgram(program_)); } + #endif } // Compiles the device program and checks whether or not there are any warnings/errors diff --git a/src/cupp11.hpp b/src/cupp11.hpp index a1cb1614..ce765844 100644 --- a/src/cupp11.hpp +++ b/src/cupp11.hpp @@ -327,6 +327,11 @@ public: std::string AMDBoardName() const { return ""; } std::string NVIDIAComputeCapability() const { return Capabilities(); } + // Returns if the Nvidia chip is a Volta or later archicture (major version 7 or higher) + bool IsPostNVIDIAVolta() const { + return GetInfo(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 7; + } + // Retrieves the above extra information std::string GetExtraInfo() const { return NVIDIAComputeCapability(); } diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index 545dee1c..7b7312c9 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry CopySingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +87,7 @@ const DatabaseEntry CopySingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -140,6 +142,7 @@ const DatabaseEntry CopySingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -161,7 +164,7 @@ const DatabaseEntry CopySingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 72405e49..64d56a7b 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry CopyComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -135,6 +136,7 @@ const DatabaseEntry CopyComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index 2a8f8877..00d44182 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry CopyDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry CopyDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index 6ec45e40..732b8143 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry CopyComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry CopyComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp index 26d8af42..b685d4bc 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_32.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp @@ -23,16 +23,18 @@ const DatabaseEntry GemmRoutineSingle = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -47,9 +49,10 @@ const DatabaseEntry GemmRoutineSingle = { { kDeviceNameDefault , Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1792, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -59,7 +62,7 @@ const DatabaseEntry GemmRoutineSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_3232.hpp b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp index 0318d47b..c72db083 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_3232.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp @@ -15,8 +15,9 @@ const DatabaseEntry GemmRoutineComplexSingle = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -35,9 +36,10 @@ const DatabaseEntry GemmRoutineComplexSingle = { { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1408, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1408, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -47,7 +49,7 @@ const DatabaseEntry GemmRoutineComplexSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_64.hpp b/src/database/kernels/gemm_routine/gemm_routine_64.hpp index f84912ba..c40470da 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_64.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_64.hpp @@ -15,8 +15,9 @@ const DatabaseEntry GemmRoutineDouble = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -27,19 +28,20 @@ const DatabaseEntry GemmRoutineDouble = { { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 704, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 640, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_6464.hpp b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp index 8e84594c..c42267e1 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_6464.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp @@ -15,8 +15,9 @@ const DatabaseEntry GemmRoutineComplexDouble = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -27,9 +28,10 @@ const DatabaseEntry GemmRoutineComplexDouble = { { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 640, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -39,7 +41,7 @@ const DatabaseEntry GemmRoutineComplexDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp index 0342bf78..b3f9143a 100644 --- a/src/database/kernels/invert/invert_32.hpp +++ b/src/database/kernels/invert/invert_32.hpp @@ -12,9 +12,18 @@ namespace database { const DatabaseEntry InvertSingle = { "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, @@ -30,6 +39,10 @@ const DatabaseEntry InvertSingle = { { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp index 07a6ce5e..11ea895d 100644 --- a/src/database/kernels/invert/invert_3232.hpp +++ b/src/database/kernels/invert/invert_3232.hpp @@ -12,6 +12,14 @@ namespace database { const DatabaseEntry InvertComplexSingle = { "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -30,6 +38,10 @@ const DatabaseEntry InvertComplexSingle = { { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, @@ -38,7 +50,7 @@ const DatabaseEntry InvertComplexSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp index 13a98b40..2ece5668 100644 --- a/src/database/kernels/invert/invert_64.hpp +++ b/src/database/kernels/invert/invert_64.hpp @@ -12,6 +12,14 @@ namespace database { const DatabaseEntry InvertDouble = { "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM5.0", { @@ -22,6 +30,10 @@ const DatabaseEntry InvertDouble = { { Name{"GeForce GTX 970 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp index ef01cd78..60795ad6 100644 --- a/src/database/kernels/invert/invert_6464.hpp +++ b/src/database/kernels/invert/invert_6464.hpp @@ -12,6 +12,14 @@ namespace database { const DatabaseEntry InvertComplexDouble = { "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { { "SM5.0", { @@ -22,6 +30,10 @@ const DatabaseEntry InvertComplexDouble = { { Name{"GeForce GTX 970 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 69a141da..92fd2db7 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry PadSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +87,7 @@ const DatabaseEntry PadSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -140,6 +142,7 @@ const DatabaseEntry PadSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index a517362a..88ae08a3 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry PadComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -137,6 +138,7 @@ const DatabaseEntry PadComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index d9f5624b..a2e9c2bb 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry PadDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 32, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry PadDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 0e4e6cc1..3d6fdaf6 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -62,13 +62,14 @@ const DatabaseEntry PadComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 16, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -123,6 +124,7 @@ const DatabaseEntry PadComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index 1ff1306d..7ac7dd10 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry PadtransposeSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +87,7 @@ const DatabaseEntry PadtransposeSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -139,6 +141,7 @@ const DatabaseEntry PadtransposeSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index 20b9c003..fb0ec5d0 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry PadtransposeComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -137,6 +138,7 @@ const DatabaseEntry PadtransposeComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index ad859fba..b0311dc6 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry PadtransposeDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry PadtransposeDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index 936c0f61..e5b36baa 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry PadtransposeComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 0, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry PadtransposeComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index 2e9d2390..22ddb2c8 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -86,6 +86,7 @@ const DatabaseEntry TransposeSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 16, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -140,6 +141,7 @@ const DatabaseEntry TransposeSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index f939693b..45b2c3ff 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry TransposeComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 0, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 64, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -129,10 +130,11 @@ const DatabaseEntry TransposeComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index e3582867..098295ce 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry TransposeDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 4, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry TransposeDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index 2e39db45..fb855b00 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -115,6 +115,7 @@ const DatabaseEntry TransposeComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/trsv_routine/trsv_routine_32.hpp b/src/database/kernels/trsv_routine/trsv_routine_32.hpp index 60540555..2ee82b71 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_32.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_32.hpp @@ -12,14 +12,34 @@ namespace database { const DatabaseEntry TrsvRoutineSingle = { "TrsvRoutine", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp index e40c49bf..6f2f9306 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp @@ -12,6 +12,14 @@ namespace database { const DatabaseEntry TrsvRoutineComplexSingle = { "TrsvRoutine", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -20,6 +28,17 @@ const DatabaseEntry TrsvRoutineComplexSingle = { } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/trsv_routine/trsv_routine_64.hpp b/src/database/kernels/trsv_routine/trsv_routine_64.hpp index 57ce26a5..a1d098eb 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_64.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_64.hpp @@ -12,6 +12,25 @@ namespace database { const DatabaseEntry TrsvRoutineDouble = { "TrsvRoutine", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp index 89bc1f4d..49cb745d 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp @@ -12,6 +12,25 @@ namespace database { const DatabaseEntry TrsvRoutineComplexDouble = { "TrsvRoutine", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "default", { + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index 20ab3fdd..d41d4fcf 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry XaxpySingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 4, 2048, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 2, 2048, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +87,7 @@ const DatabaseEntry XaxpySingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -140,10 +142,11 @@ const DatabaseEntry XaxpySingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index 347b2874..4a29da4d 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XaxpyComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 1024, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -137,6 +138,7 @@ const DatabaseEntry XaxpyComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index 3c7b4f6e..85c89fa2 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XaxpyDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1, 2048, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,10 +124,11 @@ const DatabaseEntry XaxpyDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 10e69026..e54746de 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XaxpyComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 8, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry XaxpyComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index 69724f05..a7e685bf 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -67,6 +67,7 @@ const DatabaseEntry XdotSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1024, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -80,6 +81,7 @@ const DatabaseEntry XdotSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 512, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +125,7 @@ const DatabaseEntry XdotSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index 10aadac0..ad2cf414 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -66,6 +66,7 @@ const DatabaseEntry XdotComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1024, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -120,6 +121,7 @@ const DatabaseEntry XdotComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1024, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index 7e37026c..a72f5c0f 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -58,6 +58,7 @@ const DatabaseEntry XdotDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1024, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -107,6 +108,7 @@ const DatabaseEntry XdotDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 256, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index 80f455a4..d062732a 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -58,6 +58,7 @@ const DatabaseEntry XdotComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -107,13 +108,14 @@ const DatabaseEntry XdotComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index bc68555c..32358dbc 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry XgemmSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 1, 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 1, 32, 2, 32, 16, 64, 32, 8, 64, 0, 1, 1, 0, 1, 1 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 16, 1, 1, 2, 2, 64, 2, 2, 128, 0, 0, 0, 0, 8, 2 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 1, 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 1, 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 1, 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, @@ -86,6 +87,7 @@ const DatabaseEntry XgemmSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 0, 1, 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 0, 1, 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 1, 4, 1, 1, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 0, 1, 32, 2, 32, 8, 64, 16, 16, 128, 0, 0, 0, 1, 1, 2 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 0, 1, 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 16, 128, 1, 1, 0, 1, 1, 4 } }, @@ -140,6 +142,7 @@ const DatabaseEntry XgemmSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 1, 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 32, 16, 64, 16, 8, 128, 1, 1, 0, 1, 2, 8 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index febca946..22959347 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XgemmComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 1, 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 1, 32, 2, 32, 32, 32, 16, 16, 128, 1, 0, 0, 0, 1, 1 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 16, 1, 1, 4, 4, 128, 2, 2, 64, 0, 0, 0, 0, 4, 8 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 1, 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 1, 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, @@ -137,20 +138,21 @@ const DatabaseEntry XgemmComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 1, 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 16, 2, 16, 8, 128, 16, 32, 64, 1, 1, 1, 1, 1, 2 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 8, 16, 32, 16, 8, 64, 1, 1, 0, 0, 1, 1 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 2, 1 } }, } }, } }, { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 16, 8, 8, 16, 1, 1, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index e599b326..48ee99f8 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemmDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 1, 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 1, 32, 2, 16, 8, 128, 16, 8, 128, 1, 0, 1, 1, 1, 8 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 8, 1, 1, 2, 2, 128, 2, 2, 64, 0, 0, 0, 0, 2, 1 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 1, 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 1, 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 1, 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, @@ -123,6 +124,7 @@ const DatabaseEntry XgemmDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 1, 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 16, 16, 16, 16, 16, 64, 0, 0, 1, 0, 1, 4 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, @@ -136,7 +138,7 @@ const DatabaseEntry XgemmDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 2 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 290cd9d9..3da772f2 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemmComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 0, 1, 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 0, 1, 32, 2, 8, 8, 32, 16, 32, 128, 1, 0, 1, 0, 4, 1 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 16, 1, 1, 4, 4, 128, 2, 2, 64, 0, 0, 0, 0, 4, 8 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 0, 1, 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 0, 1, 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 0, 1, 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, @@ -122,13 +123,14 @@ const DatabaseEntry XgemmComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 0, 1, 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 32, 32, 32, 32, 32, 64, 0, 0, 0, 0, 1, 2 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 1 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index b75a75fb..f6ea9523 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -55,6 +55,7 @@ const DatabaseEntry XgemmDirectSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 1, 8, 64, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 16, 16, 16, 16, 0, 0, 1, 1, 64, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 8, 8, 8, 8, 0, 0, 8, 4, 64, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 2, 32, 32, 32, 32, 0, 0, 1, 1, 64, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 16, 16, 8, 8, 8, 0, 0, 2, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 2, 2, 64, 0, 0, 0, 0, 0, 0 } }, @@ -66,6 +67,7 @@ const DatabaseEntry XgemmDirectSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 8, 16, 8, 16, 16, 1, 0, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, @@ -99,10 +101,11 @@ const DatabaseEntry XgemmDirectSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 32, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 8, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 16, 8, 16, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 8, 8, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 8, 8, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 4, 2, 32, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index fe0fd030..8f24ee7d 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -50,6 +50,7 @@ const DatabaseEntry XgemmDirectComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 2, 16, 8, 16, 8, 0, 0, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 32, 32, 8, 8, 1, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 8, 8, 8, 8, 8, 0, 0, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 16, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, @@ -92,6 +93,7 @@ const DatabaseEntry XgemmDirectComplexSingle = { { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 8, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 16, 16, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index 6f2e0f1c..9d563e78 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -46,6 +46,7 @@ const DatabaseEntry XgemmDirectDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 8, 8, 32, 8, 0, 1, 2, 2, 64, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 8, 8, 8, 8, 0, 0, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +87,7 @@ const DatabaseEntry XgemmDirectDouble = { { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 8, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, @@ -99,7 +101,7 @@ const DatabaseEntry XgemmDirectDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index e2aa9f47..af5eeeb5 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -46,11 +46,12 @@ const DatabaseEntry XgemmDirectComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 8, 8, 32, 8, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 2, 16, 16, 8, 8, 0, 0, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 2, 8, 8, 8, 8, 0, 0, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 16, 16, 8, 8, 0, 0, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 8, 16, 8, 8, 8, 0, 0, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 2, 32, 8, 8, 8, 0, 0, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -86,6 +87,7 @@ const DatabaseEntry XgemmDirectComplexDouble = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, @@ -99,7 +101,7 @@ const DatabaseEntry XgemmDirectComplexDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index fd78e8ff..350c3ada 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry XgemvSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -85,6 +86,7 @@ const DatabaseEntry XgemvSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -138,13 +140,14 @@ const DatabaseEntry XgemvSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index ae19bf91..dc9a0a88 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XgemvComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -131,10 +132,11 @@ const DatabaseEntry XgemvComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index a8c68f81..c42f187a 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemvDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -114,6 +115,7 @@ const DatabaseEntry XgemvDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp index c9842698..7b62b932 100644 --- a/src/database/kernels/xgemv/xgemv_6464.hpp +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemvComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -97,6 +98,7 @@ const DatabaseEntry XgemvComplexDouble = { { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index 4f1f1671..146bd466 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry XgemvFastSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 1, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -85,6 +86,7 @@ const DatabaseEntry XgemvFastSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -138,6 +140,7 @@ const DatabaseEntry XgemvFastSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp index 92fc16fe..693fac4e 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XgemvFastComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -116,6 +117,7 @@ const DatabaseEntry XgemvFastComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index f684c87b..ef129fd4 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemvFastDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -114,6 +115,7 @@ const DatabaseEntry XgemvFastDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp index 56fe25d6..7cf8caa6 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -62,6 +62,7 @@ const DatabaseEntry XgemvFastComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -93,6 +94,7 @@ const DatabaseEntry XgemvFastComplexDouble = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index 232cf2c4..42e7a36d 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -55,6 +55,7 @@ const DatabaseEntry XgemvFastRotSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -67,6 +68,7 @@ const DatabaseEntry XgemvFastRotSingle = { kDeviceTypeGPU, "Intel", { { "default", { { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 2, 128, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 4, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,6 +103,7 @@ const DatabaseEntry XgemvFastRotSingle = { { kDeviceNameDefault , Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp index 8c08d4d2..98d5cf6a 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -54,11 +54,12 @@ const DatabaseEntry XgemvFastRotComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -84,11 +85,12 @@ const DatabaseEntry XgemvFastRotComplexSingle = { { kDeviceNameDefault , Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index 7f65bb3d..eabe795d 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -46,11 +46,12 @@ const DatabaseEntry XgemvFastRotDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -79,6 +80,7 @@ const DatabaseEntry XgemvFastRotDouble = { { kDeviceNameDefault , Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp index 0408b60e..1d32eb73 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -46,6 +46,7 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -65,11 +66,12 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { kDeviceNameDefault , Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM6.1", { + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index e4d1ec39..43f64f13 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -71,6 +71,7 @@ const DatabaseEntry XgerSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 128, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -85,6 +86,7 @@ const DatabaseEntry XgerSingle = { { "default", { { Name{"Intel(R) HD Graphics 530 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 256, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) HD Graphics 6000 BroadWell U-Processor GT"}, Params{ 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -126,6 +128,7 @@ const DatabaseEntry XgerSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 64, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index 2a9db386..123fc4fa 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -70,6 +70,7 @@ const DatabaseEntry XgerComplexSingle = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 512, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 512, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -123,6 +124,7 @@ const DatabaseEntry XgerComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 16, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 128, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index 2bc0805b..05a33a28 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -62,12 +62,13 @@ const DatabaseEntry XgerDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 256, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 256, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 512, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -110,6 +111,7 @@ const DatabaseEntry XgerDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 01adc7cc..52b51256 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -62,12 +62,13 @@ const DatabaseEntry XgerComplexDouble = { { "default", { { Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 128, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 128, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 512, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz "}, Params{ 256, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz "}, Params{ 512, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz "}, Params{ 256, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -110,6 +111,7 @@ const DatabaseEntry XgerComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 4a476a8b..0ad38919 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -260,7 +260,7 @@ R"( // Staggered/shuffled group indices to avoid partition camping (AMD GPUs). Formula's are taken from: // http://docs.nvidia.com/cuda/samples/6_Advanced/transpose/doc/MatrixTranspose.pdf // More details: https://github.com/CNugteren/CLBlast/issues/53 -#if USE_STAGGERED_INDICES == 1 +#if USE_STAGGERED_INDICES == 1 && GEMMK == 0 INLINE_FUNC int GetGroupIDFlat() { return get_group_id(0) + get_num_groups(0) * get_group_id(1); } diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl index 99d64c91..3cfc5dfb 100644 --- a/src/kernels/level3/xgemm_part1.opencl +++ b/src/kernels/level3/xgemm_part1.opencl @@ -114,13 +114,29 @@ R"( #define GLOBAL_MEM_FENCE 0 // Global synchronisation barrier for potential better performance #endif -// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt) +#ifndef SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA + #define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 0 +#endif +#ifndef SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA + #define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 0 +#endif +#ifndef SUBGROUP_SHUFFLING_INTEL + #define SUBGROUP_SHUFFLING_INTEL 0 +#endif #ifndef USE_SUBGROUP_SHUFFLING #define USE_SUBGROUP_SHUFFLING 0 // Optionally enables subgroup shuffling for Intel GPUs #endif -#if USE_SUBGROUP_SHUFFLING == 1 + +// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt) +#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL #define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs #endif + +// NVIDIA warps as subgroups using inline PTX (https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html) +#if USE_SUBGROUP_SHUFFLING == 1 && (SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA) + #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs +#endif + #if NWI != SUBGROUP_SIZE || MDIMC < SUBGROUP_SIZE #undef USE_SUBGROUP_SHUFFLING #define USE_SUBGROUP_SHUFFLING 0 // Disables subgroups in case the assumptions don't hold diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index c3920cb5..90de0b3b 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -17,6 +17,44 @@ R"( // ================================================================================================= +// A common interface for subgroup functions + +#if USE_SUBGROUP_SHUFFLING == 1 + +INLINE_FUNC int clblast_get_sub_group_local_id() { + + // Intel extension + #if SUBGROUP_SHUFFLING_INTEL == 1 + return get_sub_group_local_id(); + + // Nvidia inline PTX + #elif SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA == 1 || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA == 1 + int ret; + asm volatile("mov.u32 %0, %%laneid;" : "=r"(ret) ); + return ret; + #endif +} + +INLINE_FUNC realN clblast_sub_group_shuffle(realN reg, int src) { + + // Intel extension + #if SUBGROUP_SHUFFLING_INTEL == 1 + return intel_sub_group_shuffle(reg, src); + + // Nvidia inline PTX + // Volta and later requires .sync shuffle instructions with an extra mask arg + #elif SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA == 1 || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA == 1 + realN ret; + #if SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA == 1 + asm volatile("shfl.sync.idx.b32 %0, %1, %2, 0x1f, 0xffffffff;" : "=f"(ret): "f"(reg), "r"(src)); + #else + asm volatile("shfl.idx.b32 %0, %1, %2, 0x1f;" : "=f"(ret): "f"(reg), "r"(src)); + #endif + return ret; + #endif +} +#endif + // Main body of the matrix-multiplication algorithm. It calls various (inlined) functions. INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, const __global realM* restrict agm, const __global realN* restrict bgm, @@ -53,8 +91,8 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #if GEMMK == 1 const __global real* restrict a_ptr = (const __global real* restrict) &agm[0]; const __global real* restrict b_ptr = (const __global real* restrict) &bgm[0]; - const int tid_x = get_global_id(0); - const int tid_y = get_global_id(1); + const int tid_x = get_local_id(0) + MDIMC * GetGroupID0(); + const int tid_y = get_local_id(1) + NDIMC * GetGroupID1(); #endif // Combined thread identifier (volatile to disable caching) @@ -130,7 +168,7 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #elif GEMMK == 1 // Loads data: 2D global --> 2D private (matrix A). Partly, shuffled later among subgroups #if USE_SUBGROUP_SHUFFLING == 1 - const int _ni = get_sub_group_local_id(); + const int _ni = clblast_get_sub_group_local_id(); #pragma unroll for (int _ki = 0; _ki < KREG/VWN; _ki += 1) { apm[_ki] = GlobalToPrivateA2D(a_ptr, tid_y, _ni, kSizeK, idk, _ki); @@ -202,7 +240,7 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, for (int _ki = 0; _ki < KREG/VWN; _ki += 1) { const int index = _ni * (MWI/VWM) + _mi; #if USE_SUBGROUP_SHUFFLING == 1 - const realN aval = intel_sub_group_shuffle(apm[_ki], _ni); + const realN aval = clblast_sub_group_shuffle(apm[_ki], _ni); #else const realN aval = apm[_ni * (KREG/VWN) + _ki]; #endif diff --git a/src/routine.cpp b/src/routine.cpp index 4caa4d7b..2df6d563 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -96,7 +96,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) { auto binary = BinaryCache::Instance().Get(BinaryKeyRef{platform_id, precision_, routine_info, device_name }, &has_binary); if (has_binary) { - program_ = std::make_shared<Program>(Program(device_, context_, binary)); + program_ = std::make_shared<Program>(device_, context_, binary); program_->Build(device_, options); ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_info }, std::shared_ptr<Program>{program_}); diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp index 216f4b31..822f8851 100644 --- a/src/tuning/tuning.cpp +++ b/src/tuning/tuning.cpp @@ -342,8 +342,17 @@ void Tuner(int argc, char* argv[], const int V, const auto best_time_ms = best_configuration->score; if (best_time_ms == 0.0) { return; } - // Also prints the performance of the best-case in terms of GB/s or GFLOPS + // Computes and prints some other statistics + auto average_ms = 0.0; + for (const auto result : results) { average_ms += result.score; } + average_ms /= results.size(); printf("\n"); + printf("* Got average result of %.2lf ms", average_ms); + printf(": %.1lf %s\n", settings.metric_amount / (average_ms * 1.0e6), + settings.performance_unit.c_str()); + + + // Also prints the performance of the best-case in terms of GB/s or GFLOPS printf("* Found best result %.2lf ms", best_time_ms); printf(": %.1lf %s\n", settings.metric_amount / (best_time_ms * 1.0e6), settings.performance_unit.c_str()); diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index 05c29944..835f54b4 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -61,8 +61,22 @@ std::shared_ptr<Program> CompileFromSource( // For Intel GPUs with subgroup support, use subgroup shuffling. if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n"; } + // For NVIDIA GPUs, inline PTX can provide subgroup support + if (device.IsGPU() && device.IsNVIDIA() && precision == Precision::kSingle) { + header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + + // Nvidia needs to check pre or post volta due to new shuffle commands + if (device.IsPostNVIDIAVolta()) { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 1\n"; + } + else { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 1\n"; + } + } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels #ifdef CUDA_API header_string += diff --git a/src/utilities/device_mapping.hpp b/src/utilities/device_mapping.hpp index 7fdc04a0..c814622f 100644 --- a/src/utilities/device_mapping.hpp +++ b/src/utilities/device_mapping.hpp @@ -43,6 +43,11 @@ const std::unordered_map<std::string, std::string> kDeviceNames { // Empty }; +// Things to remove from device names (low-level) +const std::vector<std::string> kDeviceRemovals { + "pthread-" +}; + // ================================================================================================= } // namespace device_mapping } // namespace clblast diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index 2008b6a3..a8fdaa19 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -477,6 +477,14 @@ std::string GetDeviceName(const Device& device) { for (auto &find_and_replace : device_mapping::kDeviceNames) { // replacing to common names if (device_name == find_and_replace.first) { device_name = find_and_replace.second; } } + + for (auto &removal : device_mapping::kDeviceRemovals) { // removing certain things + if (device_name.find(removal) != std::string::npos) { + auto start_position_to_erase = device_name.find(removal); + device_name.erase(start_position_to_erase, removal.length()); + } + } + return device_name; } |