summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/internal/database.h2
-rw-r--r--include/internal/database/copy.h6
-rw-r--r--include/internal/database/pad.h6
-rw-r--r--include/internal/database/padtranspose.h6
-rw-r--r--include/internal/database/transpose.h6
-rw-r--r--include/internal/database/xaxpy.h6
-rw-r--r--include/internal/database/xgemv.h6
-rw-r--r--include/internal/database/xger.h18
-rw-r--r--src/database.cc2
9 files changed, 38 insertions, 20 deletions
diff --git a/include/internal/database.h b/include/internal/database.h
index b3788666..34629bf5 100644
--- a/include/internal/database.h
+++ b/include/internal/database.h
@@ -70,7 +70,7 @@ class Database {
static const DatabaseEntry XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble;
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
- static const DatabaseEntry XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
+ static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h
index b4695868..63f8e814 100644
--- a/include/internal/database/copy.h
+++ b/include/internal/database/copy.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::CopyHalf = {
"Copy", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
- { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
+ { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
}
},
}
diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h
index aae7631b..d0a85e7c 100644
--- a/include/internal/database/pad.h
+++ b/include/internal/database/pad.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::PadHalf = {
"Pad", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
+ { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
+ { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
}
diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h
index 2d8d47f2..0eb3b528 100644
--- a/include/internal/database/padtranspose.h
+++ b/include/internal/database/padtranspose.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::PadtransposeHalf = {
"Padtranspose", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
+ { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
}
},
}
diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h
index 8e5ace67..d7bdd90a 100644
--- a/include/internal/database/transpose.h
+++ b/include/internal/database/transpose.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::TransposeHalf = {
"Transpose", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
+ { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
}
},
}
diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h
index 6c5e478b..72e6a43c 100644
--- a/include/internal/database/xaxpy.h
+++ b/include/internal/database/xaxpy.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::XaxpyHalf = {
"Xaxpy", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
- { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
+ { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"VW",8}, {"WGS",512}, {"WPT",1} } },
+ { "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
}
},
}
diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h
index c6e9d600..0d11f5ee 100644
--- a/include/internal/database/xgemv.h
+++ b/include/internal/database/xgemv.h
@@ -18,13 +18,13 @@ const Database::DatabaseEntry Database::XgemvHalf = {
"Xgemv", Precision::kHalf, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
- { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
- { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
+ { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
}
},
{ // Default
kDeviceTypeAll, "default", {
- { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
+ { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
}
},
}
diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h
index dae857cd..81b8b98a 100644
--- a/include/internal/database/xger.h
+++ b/include/internal/database/xger.h
@@ -14,6 +14,24 @@
namespace clblast {
// =================================================================================================
+const Database::DatabaseEntry Database::XgerHalf = {
+ "Xger", Precision::kHalf, {
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
const Database::DatabaseEntry Database::XgerSingle = {
"Xger", Precision::kSingle, {
{ // AMD GPUs
diff --git a/src/database.cc b/src/database.cc
index 138dbd2d..dc72dbdd 100644
--- a/src/database.cc
+++ b/src/database.cc
@@ -32,7 +32,7 @@ const std::vector<Database::DatabaseEntry> Database::database = {
XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble,
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
- XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
+ XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,