diff options
-rw-r--r-- | include/internal/database.h | 2 | ||||
-rw-r--r-- | include/internal/database/copy.h | 6 | ||||
-rw-r--r-- | include/internal/database/pad.h | 6 | ||||
-rw-r--r-- | include/internal/database/padtranspose.h | 6 | ||||
-rw-r--r-- | include/internal/database/transpose.h | 6 | ||||
-rw-r--r-- | include/internal/database/xaxpy.h | 6 | ||||
-rw-r--r-- | include/internal/database/xgemv.h | 6 | ||||
-rw-r--r-- | include/internal/database/xger.h | 18 | ||||
-rw-r--r-- | src/database.cc | 2 |
9 files changed, 38 insertions, 20 deletions
diff --git a/include/internal/database.h b/include/internal/database.h index b3788666..34629bf5 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -70,7 +70,7 @@ class Database { static const DatabaseEntry XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; - static const DatabaseEntry XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; + static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index b4695868..63f8e814 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::CopyHalf = { "Copy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, - { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index aae7631b..d0a85e7c 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::PadHalf = { "Pad", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, } diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 2d8d47f2..0eb3b528 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::PadtransposeHalf = { "Padtranspose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 8e5ace67..d7bdd90a 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::TransposeHalf = { "Transpose", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, } diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 6c5e478b..72e6a43c 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::XaxpyHalf = { "Xaxpy", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, - { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, + { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, } }, } diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index c6e9d600..0d11f5ee 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::XgemvHalf = { "Xgemv", Precision::kHalf, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h index dae857cd..81b8b98a 100644 --- a/include/internal/database/xger.h +++ b/include/internal/database/xger.h @@ -14,6 +14,24 @@ namespace clblast { // ================================================================================================= +const Database::DatabaseEntry Database::XgerHalf = { + "Xger", Precision::kHalf, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + } + }, + } +}; + +// ================================================================================================= + const Database::DatabaseEntry Database::XgerSingle = { "Xger", Precision::kSingle, { { // AMD GPUs diff --git a/src/database.cc b/src/database.cc index 138dbd2d..dc72dbdd 100644 --- a/src/database.cc +++ b/src/database.cc @@ -32,7 +32,7 @@ const std::vector<Database::DatabaseEntry> Database::database = { XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble, XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, - XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, + XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, |