From 83c6a517659ee1d005da32a7593e8b5fdb7827ee Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 12 Mar 2016 15:10:35 +0100 Subject: Added tuning results for the ARM Mali-T628 GPU --- include/internal/database/copy.h | 18 ++++++++++++++++++ include/internal/database/pad.h | 24 ++++++++++++++++++++++++ include/internal/database/padtranspose.h | 24 ++++++++++++++++++++++++ include/internal/database/transpose.h | 24 ++++++++++++++++++++++++ include/internal/database/xaxpy.h | 24 ++++++++++++++++++++++++ include/internal/database/xdot.h | 24 ++++++++++++++++++++++++ include/internal/database/xgemm.h | 24 ++++++++++++++++++++++++ include/internal/database/xger.h | 26 +++++++++++++++++++++++++- 8 files changed, 187 insertions(+), 1 deletion(-) (limited to 'include/internal') diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 820e0f04..42c9c21c 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::CopySingle = { { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",2} } }, @@ -127,6 +133,12 @@ const Database::DatabaseEntry Database::CopyDouble = { { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, @@ -173,6 +185,12 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index c08a60d6..8e8ae966 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::PadSingle = { { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::PadDouble = { { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 5a89869f..6e210e84 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 1948e0d6..005a6921 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, @@ -123,6 +135,12 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, @@ -169,6 +187,12 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 491dfc68..aa0c2d2f 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",2}, {"WGS",128}, {"WPT",2} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } }, diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 31d0f84b..b741e317 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XdotSingle = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",128}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",256} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XdotDouble = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",64}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",512} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",512} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 833e2c7a..45efa397 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XgemmSingle = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XgemmDouble = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h index 1f7730b1..ccc7a06f 100644 --- a/include/internal/database/xger.h +++ b/include/internal/database/xger.h @@ -22,6 +22,12 @@ const Database::DatabaseEntry Database::XgerSingle = { { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, @@ -52,6 +58,12 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, @@ -66,7 +78,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, } }, } @@ -82,6 +94,12 @@ const Database::DatabaseEntry Database::XgerDouble = { { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, @@ -106,6 +124,12 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, -- cgit v1.2.3