summaryrefslogtreecommitdiff
path: root/src/database
diff options
context:
space:
mode:
Diffstat (limited to 'src/database')
-rw-r--r--src/database/apple_cpu_fallback.hpp70
-rw-r--r--src/database/database.cpp33
-rw-r--r--src/database/database.hpp3
3 files changed, 102 insertions, 4 deletions
diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp
new file mode 100644
index 00000000..89ac8f71
--- /dev/null
+++ b/src/database/apple_cpu_fallback.hpp
@@ -0,0 +1,70 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file provides overrides for Apple's OpenCL CPU implementation. It is a special case compared
+// to all other implementations, as it only supports a 1-dimensional work-group size. In addition,
+// that work-group size is limited to 1024 (in theory) or much lower (kernel resource dependent).
+// Thus, instead of supporting this corner-case in the whole regular flow (starting from the tuner),
+// we provide this file with some manual overrides.
+//
+// Note: These overrides are to make the Apple CPU work and not crash, they are not in any way
+// optimized parameters. For decent speed don't use Apple's OpenCL CPU implementation.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+// =================================================================================================
+
+const Database::DatabaseEntry XaxpyApple = {
+ "Xaxpy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW",8}, {"WGS",1}, {"WPT",4} } } } } }
+};
+const Database::DatabaseEntry XdotApple = {
+ "Xdot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WGS2",1} } } } } }
+};
+const Database::DatabaseEntry XgemvApple = {
+ "Xgemv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WPT1",4}, {"UNROLL1", 1} } } } } }
+};
+const Database::DatabaseEntry XgemvFastApple = {
+ "XgemvFast", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW2",1}, {"WGS2",1}, {"WPT2",1} } } } } }
+};
+const Database::DatabaseEntry XgemvFastRotApple = {
+ "XgemvFastRot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW3",1}, {"WGS3",1}, {"WPT3",1} } } } } }
+};
+const Database::DatabaseEntry XgerApple = {
+ "Xger", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } } } } }
+};
+const Database::DatabaseEntry XtrsvApple = {
+ "Xtrsv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRSV_BLOCK_SIZE",32} } } } } }
+};
+const Database::DatabaseEntry XgemmApple = {
+ "Xgemm", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWG",1}, {"KWI",1}, {"MDIMA",1}, {"MDIMC",1}, {"MWG",1}, {"NDIMB",1}, {"NDIMC",1}, {"NWG",1}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } } } } }
+};
+const Database::DatabaseEntry XgemmDirectApple = {
+ "XgemmDirect", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWID",1}, {"MDIMAD",1}, {"MDIMCD",1}, {"NDIMBD",1}, {"NDIMCD",1}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",1} } } } } }
+};
+const Database::DatabaseEntry CopyApple = {
+ "Copy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"COPY_DIMX",1}, {"COPY_DIMY",1}, {"COPY_VW",1}, {"COPY_WPT",1} } } } } }
+};
+const Database::DatabaseEntry PadApple = {
+ "Pad", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PAD_DIMX",1}, {"PAD_DIMY",1}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } } } } }
+};
+const Database::DatabaseEntry TransposeApple = {
+ "Transpose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRA_DIM",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } } } } }
+};
+const Database::DatabaseEntry PadtransposeApple = {
+ "Padtranspose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",1}, {"PADTRA_WPT",1} } } } } }
+};
+const Database::DatabaseEntry InvertApple = {
+ "Invert", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"INTERNAL_BLOCK_SIZE",16} } } } } }
+};
+
+// =================================================================================================
+} // namespace database
+} // namespace clblast
diff --git a/src/database/database.cpp b/src/database/database.cpp
index f1d1dc66..fe026577 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -11,6 +11,8 @@
//
// =================================================================================================
+#include <list>
+
#include "utilities/utilities.hpp"
#include "database/database.hpp"
@@ -28,12 +30,13 @@
#include "database/kernels/transpose.hpp"
#include "database/kernels/padtranspose.hpp"
#include "database/kernels/invert.hpp"
+#include "database/apple_cpu_fallback.hpp"
#include "database/kernel_selection.hpp"
namespace clblast {
// =================================================================================================
-// Initializes the database
+// Initializes the databases
const std::vector<const Database::DatabaseEntry*> Database::database = {
&database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble,
&database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble,
@@ -51,8 +54,15 @@ const std::vector<const Database::DatabaseEntry*> Database::database = {
&database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble,
&database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble
};
+const std::vector<const Database::DatabaseEntry*> Database::apple_cpu_fallback = {
+ &database::XaxpyApple, &database::XdotApple,
+ &database::XgemvApple, &database::XgemvFastApple, &database::XgemvFastRotApple, &database::XgerApple, &database::XtrsvApple,
+ &database::XgemmApple, &database::XgemmDirectApple,
+ &database::CopyApple, &database::PadApple, &database::TransposeApple, &database::PadtransposeApple,
+ &database::InvertApple
+};
-// The OpenCL device vendors
+// The default values
const std::string Database::kDeviceVendorAll = "default";
// Alternative names for some OpenCL vendors
@@ -83,9 +93,23 @@ Database::Database(const Device &device, const std::string &kernel_name,
}
}
+ // Sets the databases to search through
+ auto databases = std::list<const std::vector<const DatabaseEntry*>>{overlay, database};
+
+ // Special case: modifies the database if the device is a CPU with Apple OpenCL
+ #if defined(__APPLE__) || defined(__MACOSX)
+ if (device.Type() == "CPU") {
+ auto extensions = device.Capabilities();
+ const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
+ if (is_apple) {
+ databases.push_front(apple_cpu_fallback);
+ }
+ }
+ #endif
+
// Searches potentially multiple databases
auto search_result = ParametersPtr{};
- for (auto &db: { overlay, database}) {
+ for (auto &db: databases) {
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
if (search_result) {
parameters_->insert(search_result->begin(), search_result->end());
@@ -128,7 +152,8 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel,
// Selects the right kernel
for (auto &db: this_database) {
- if (db->kernel == this_kernel && db->precision == this_precision) {
+ if ((db->kernel == this_kernel) &&
+ (db->precision == this_precision || db->precision == Precision::kAny)) {
// Searches for the right vendor and device type, or selects the default if unavailable. This
// assumes that the default vendor / device type is last in the database.
diff --git a/src/database/database.hpp b/src/database/database.hpp
index b34e0d8a..e49146c5 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -72,6 +72,9 @@ class Database {
// The database consists of separate database entries, stored together in a vector
static const std::vector<const DatabaseEntry*> database;
+ // Database for a special case: Apple CPUs support limited number of threads
+ static const std::vector<const DatabaseEntry*> apple_cpu_fallback;
+
Database() = default;
// The constructor with a user-provided database overlay (potentially an empty vector)