diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-05-30 12:30:43 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-05-30 12:30:43 +0200 |
commit | bc5a341dfe591946e925db315fc7d8c0c25c2938 (patch) | |
tree | b216ab5eee4863e3807d92b5ddd19fa22197ed22 /include/internal/database | |
parent | c7b054ea6747039f4405fd93da6e924f3e5c7f4b (diff) |
Initial commit of preview version
Diffstat (limited to 'include/internal/database')
-rw-r--r-- | include/internal/database/copy.h | 130 | ||||
-rw-r--r-- | include/internal/database/pad.h | 130 | ||||
-rw-r--r-- | include/internal/database/padtranspose.h | 130 | ||||
-rw-r--r-- | include/internal/database/transpose.h | 130 | ||||
-rw-r--r-- | include/internal/database/xaxpy.h | 129 | ||||
-rw-r--r-- | include/internal/database/xgemm.h | 133 |
6 files changed, 782 insertions, 0 deletions
diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h new file mode 100644 index 00000000..b9335fc9 --- /dev/null +++ b/include/internal/database/copy.h @@ -0,0 +1,130 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Copy kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::CopySingle = { + "Copy", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } }, + { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::CopyDouble = { + "Copy", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + { "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, + { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::CopyComplexSingle = { + "Copy", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + { "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } }, + { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::CopyComplexDouble = { + "Copy", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h new file mode 100644 index 00000000..5af75308 --- /dev/null +++ b/include/internal/database/pad.h @@ -0,0 +1,130 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Pad kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::PadSingle = { + "Pad", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadDouble = { + "Pad", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadComplexSingle = { + "Pad", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadComplexDouble = { + "Pad", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h new file mode 100644 index 00000000..f1127d60 --- /dev/null +++ b/include/internal/database/padtranspose.h @@ -0,0 +1,130 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the PadTranspose kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::PadTraSingle = { + "PadTranspose", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, + { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, + { "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadTraDouble = { + "PadTranspose", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadTraComplexSingle = { + "PadTranspose", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadTraComplexDouble = { + "PadTranspose", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h new file mode 100644 index 00000000..0814eb8a --- /dev/null +++ b/include/internal/database/transpose.h @@ -0,0 +1,130 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Transpose kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::TraSingle = { + "Transpose", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",8}, {"TRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::TraDouble = { + "Transpose", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",8}, {"TRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::TraComplexSingle = { + "Transpose", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::TraComplexDouble = { + "Transpose", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h new file mode 100644 index 00000000..c331945a --- /dev/null +++ b/include/internal/database/xaxpy.h @@ -0,0 +1,129 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Xaxpy kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::XaxpySingle = { + "Xaxpy", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } }, + { "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } }, + { "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XaxpyDouble = { + "Xaxpy", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + { "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } }, + { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + } + }, + } +}; +// ================================================================================================= + +const Database::DatabaseEntry Database::XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, + { "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + { "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XaxpyComplexDouble = { + "Xaxpy", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, + { "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, + { "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h new file mode 100644 index 00000000..edf41e12 --- /dev/null +++ b/include/internal/database/xgemm.h @@ -0,0 +1,133 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Xgemm kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmSingle = { + "Xgemm", Precision::kSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + { "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + { "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + { kDefault, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",8}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDouble = { + "Xgemm", Precision::kDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + { "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + { "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, + { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmComplexSingle = { + "Xgemm", Precision::kComplexSingle, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + { "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + { "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmComplexDouble = { + "Xgemm", Precision::kComplexDouble, { + { // NVIDIA GPUs + CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { + { "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, + { "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, + { "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } }, + { kDefault, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, + } + }, + { // AMD GPUs + CL_DEVICE_TYPE_GPU, "AMD", { + { "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + { // Intel GPUs + CL_DEVICE_TYPE_GPU, "Intel", { + } + }, + { // Default + CL_DEVICE_TYPE_ALL, kDefault, { + { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + } + }, + } +}; +// ================================================================================================= +} // namespace clblast |