summaryrefslogtreecommitdiff
path: root/include/internal/database
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-05-30 12:30:43 +0200
committerCNugteren <web@cedricnugteren.nl>2015-05-30 12:30:43 +0200
commitbc5a341dfe591946e925db315fc7d8c0c25c2938 (patch)
treeb216ab5eee4863e3807d92b5ddd19fa22197ed22 /include/internal/database
parentc7b054ea6747039f4405fd93da6e924f3e5c7f4b (diff)
Initial commit of preview version
Diffstat (limited to 'include/internal/database')
-rw-r--r--include/internal/database/copy.h130
-rw-r--r--include/internal/database/pad.h130
-rw-r--r--include/internal/database/padtranspose.h130
-rw-r--r--include/internal/database/transpose.h130
-rw-r--r--include/internal/database/xaxpy.h129
-rw-r--r--include/internal/database/xgemm.h133
6 files changed, 782 insertions, 0 deletions
diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h
new file mode 100644
index 00000000..b9335fc9
--- /dev/null
+++ b/include/internal/database/copy.h
@@ -0,0 +1,130 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Copy kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::CopySingle = {
+ "Copy", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
+ { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } },
+ { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::CopyDouble = {
+ "Copy", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ { "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
+ { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::CopyComplexSingle = {
+ "Copy", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ { "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } },
+ { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::CopyComplexDouble = {
+ "Copy", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h
new file mode 100644
index 00000000..5af75308
--- /dev/null
+++ b/include/internal/database/pad.h
@@ -0,0 +1,130 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Pad kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadSingle = {
+ "Pad", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
+ { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadDouble = {
+ "Pad", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadComplexSingle = {
+ "Pad", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
+ { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadComplexDouble = {
+ "Pad", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h
new file mode 100644
index 00000000..f1127d60
--- /dev/null
+++ b/include/internal/database/padtranspose.h
@@ -0,0 +1,130 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the PadTranspose kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadTraSingle = {
+ "PadTranspose", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
+ { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
+ { "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadTraDouble = {
+ "PadTranspose", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadTraComplexSingle = {
+ "PadTranspose", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::PadTraComplexDouble = {
+ "PadTranspose", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h
new file mode 100644
index 00000000..0814eb8a
--- /dev/null
+++ b/include/internal/database/transpose.h
@@ -0,0 +1,130 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Transpose kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::TraSingle = {
+ "Transpose", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",8}, {"TRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::TraDouble = {
+ "Transpose", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",8}, {"TRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::TraComplexSingle = {
+ "Transpose", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::TraComplexDouble = {
+ "Transpose", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h
new file mode 100644
index 00000000..c331945a
--- /dev/null
+++ b/include/internal/database/xaxpy.h
@@ -0,0 +1,129 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Xaxpy kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XaxpySingle = {
+ "Xaxpy", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } },
+ { "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } },
+ { "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XaxpyDouble = {
+ "Xaxpy", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ { "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } },
+ { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ }
+};
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XaxpyComplexSingle = {
+ "Xaxpy", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
+ { "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ { "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XaxpyComplexDouble = {
+ "Xaxpy", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } },
+ { "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
+ { "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h
new file mode 100644
index 00000000..edf41e12
--- /dev/null
+++ b/include/internal/database/xgemm.h
@@ -0,0 +1,133 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Xgemm kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgemmSingle = {
+ "Xgemm", Precision::kSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { kDefault, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",8}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgemmDouble = {
+ "Xgemm", Precision::kDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ { "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgemmComplexSingle = {
+ "Xgemm", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ { "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } },
+ { "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } },
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgemmComplexDouble = {
+ "Xgemm", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ { "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
+ { "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } },
+ { "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } },
+ { kDefault, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ { // AMD GPUs
+ CL_DEVICE_TYPE_GPU, "AMD", {
+ { "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ { // Intel GPUs
+ CL_DEVICE_TYPE_GPU, "Intel", {
+ }
+ },
+ { // Default
+ CL_DEVICE_TYPE_ALL, kDefault, {
+ { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ }
+ },
+ }
+};
+// =================================================================================================
+} // namespace clblast