diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/clpp11.hpp | 29 | ||||
-rw-r--r-- | src/database/database.cpp | 70 | ||||
-rw-r--r-- | src/database/database.hpp | 21 | ||||
-rw-r--r-- | src/database/database_structure.hpp | 6 | ||||
-rw-r--r-- | src/database/device_mapping.hpp | 51 | ||||
-rw-r--r-- | src/utilities/utilities.cpp | 9 | ||||
-rw-r--r-- | src/utilities/utilities.hpp | 2 |
7 files changed, 135 insertions, 53 deletions
diff --git a/src/clpp11.hpp b/src/clpp11.hpp index e0b8cbe9..9d6929a7 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -261,6 +261,11 @@ class Device { return static_cast<unsigned long>(GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE)); } std::string Capabilities() const { return GetInfoString(CL_DEVICE_EXTENSIONS); } + bool HasExtension(const std::string &extension) const { + const auto extensions = Capabilities(); + return extensions.find(extension) != std::string::npos; + } + size_t CoreClock() const { return static_cast<size_t>(GetInfo<cl_uint>(CL_DEVICE_MAX_CLOCK_FREQUENCY)); } @@ -294,13 +299,27 @@ class Device { // Query for a specific type of device or brand bool IsCPU() const { return Type() == "CPU"; } bool IsGPU() const { return Type() == "GPU"; } - bool IsAMD() const { return Vendor() == "AMD" || Vendor() == "Advanced Micro Devices, Inc." || - Vendor() == "AuthenticAMD";; } - bool IsNVIDIA() const { return Vendor() == "NVIDIA" || Vendor() == "NVIDIA Corporation"; } - bool IsIntel() const { return Vendor() == "INTEL" || Vendor() == "Intel" || - Vendor() == "GenuineIntel"; } + bool IsAMD() const { return Vendor() == "AMD" || + Vendor() == "Advanced Micro Devices, Inc." || + Vendor() == "AuthenticAMD"; } + bool IsNVIDIA() const { return Vendor() == "NVIDIA" || + Vendor() == "NVIDIA Corporation"; } + bool IsIntel() const { return Vendor() == "INTEL" || + Vendor() == "Intel" || + Vendor() == "GenuineIntel" || + Vendor() == "Intel(R) Corporation"; } bool IsARM() const { return Vendor() == "ARM"; } + // Platform specific extensions + std::string AMDBoardName() const { // check for 'cl_amd_device_attribute_query' first + return GetInfoString(CL_DEVICE_BOARD_NAME_AMD); + } + std::string NVIDIAComputeCapability() const { // check for 'cl_nv_device_attribute_query' first + return std::string{"SM"} + GetInfoString(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV) + + std::string{"."} + GetInfoString(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); + } + + // Accessor to the private data-member const cl_device_id& operator()() const { return device_; } private: diff --git a/src/database/database.cpp b/src/database/database.cpp index c0eeaa2f..d2add8c9 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -69,14 +69,6 @@ const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::v // The default values const std::string Database::kDeviceVendorAll = "default"; -// Alternative names for some OpenCL vendors -const std::unordered_map<std::string, std::string> Database::kVendorNames{ - { "Intel(R) Corporation", "Intel" }, - { "GenuineIntel", "Intel" }, - { "Advanced Micro Devices, Inc.", "AMD" }, - { "NVIDIA Corporation", "NVIDIA" }, -}; - // ================================================================================================= // Constructor, computing device properties and populating the parameter-vector from the database. @@ -85,25 +77,45 @@ Database::Database(const Device &device, const std::string &kernel_name, const Precision precision, const std::vector<database::DatabaseEntry> &overlay): parameters_(std::make_shared<database::Parameters>()) { - // Finds information of the current device + // Finds top-level information (vendor and type) auto device_type = device.Type(); auto device_vendor = device.Vendor(); - auto device_name = device.Name(); + for (auto &find_and_replace : database::kVendorNames) { // replacing to common names + if (device_vendor == find_and_replace.first) { device_vendor = find_and_replace.second; } + } - // Set the short vendor name - for (auto &combination : kVendorNames) { - if (device_vendor == combination.first) { - device_vendor = combination.second; - } + // Finds mid-level information (architecture) + auto device_architecture = std::string{""}; + if (device.HasExtension(kKhronosAttributesNVIDIA)) { + device_architecture = device.NVIDIAComputeCapability(); + } + else if (device.HasExtension(kKhronosAttributesAMD)) { + device_architecture = device.Name(); // Name is architecture for AMD APP and AMD ROCm + } + // Note: no else - 'device_architecture' might be the empty string + for (auto &find_and_replace : database::kArchitectureNames) { // replacing to common names + if (device_architecture == find_and_replace.first) { device_architecture = find_and_replace.second; } + } + + // Finds low-level information (device name) + auto device_name = std::string{""}; + if (device.HasExtension(kKhronosAttributesAMD)) { + device_name = device.AMDBoardName(); + } + else { + device_name = device.Name(); + } + for (auto &find_and_replace : database::kDeviceNames) { // replacing to common names + if (device_name == find_and_replace.first) { device_name = find_and_replace.second; } } // Sets the databases to search through - auto databases = std::list<std::vector<database::DatabaseEntry>>{overlay, database}; + const auto databases = std::list<std::vector<database::DatabaseEntry>>{overlay, database}; // Special case: modifies the database if the device is a CPU with Apple OpenCL #if defined(__APPLE__) || defined(__MACOSX) if (device.Type() == "CPU") { - auto extensions = device.Capabilities(); + const auto extensions = device.Capabilities(); const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true; if (is_apple) { databases.push_front(apple_cpu_fallback); @@ -114,7 +126,8 @@ Database::Database(const Device &device, const std::string &kernel_name, // Searches potentially multiple databases auto search_result = database::Parameters(); for (auto &db: databases) { - search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db); + search_result = Search(kernel_name, device_vendor, device_type, + device_name, device_architecture, precision, db); if (search_result.size() != 0) { parameters_->insert(search_result.begin(), search_result.end()); break; @@ -148,9 +161,8 @@ std::vector<std::string> Database::GetParameterNames() const { // Searches a particular database for the right kernel and precision database::Parameters Database::Search(const std::string &this_kernel, - const std::string &this_type, - const std::string &this_vendor, - const std::string &this_device, + const std::string &this_vendor, const std::string &this_type, + const std::string &this_device, const std::string &this_architecture, const Precision this_precision, const std::vector<database::DatabaseEntry> &this_database) const { @@ -160,10 +172,10 @@ database::Parameters Database::Search(const std::string &this_kernel, (db.precision == this_precision || db.precision == Precision::kAny)) { // Searches for the right vendor and device type, or selects the default if unavailable - const auto parameters = SearchVendorAndType(this_vendor, this_type, this_device, + const auto parameters = SearchVendorAndType(this_vendor, this_type, this_device, this_architecture, db.vendors, db.parameter_names); if (parameters.size() != 0) { return parameters; } - return SearchVendorAndType(kDeviceVendorAll, database::kDeviceTypeAll, this_device, + return SearchVendorAndType(kDeviceVendorAll, database::kDeviceTypeAll, this_device, this_architecture, db.vendors, db.parameter_names); } } @@ -172,16 +184,18 @@ database::Parameters Database::Search(const std::string &this_kernel, return database::Parameters(); } -database::Parameters Database::SearchVendorAndType(const std::string &target_vendor, - const std::string &target_type, - const std::string &this_device, +database::Parameters Database::SearchVendorAndType(const std::string &target_vendor, const std::string &target_type, + const std::string &this_device, const std::string &this_architecture, const std::vector<database::DatabaseVendor> &vendors, const std::vector<std::string> ¶meter_names) const { for (auto &vendor: vendors) { if ((vendor.name == target_vendor) && (vendor.type == target_type)) { - // Searches the device; if unavailable, returns the vendor's default parameters - const auto parameters = SearchDevice(this_device, vendor.devices, parameter_names); + // Searches the device; if unavailable, searches the architecture; if unavailable returns the + // vendor's default parameters + auto parameters = SearchDevice(this_device, vendor.devices, parameter_names); + if (parameters.size() != 0) { return parameters; } + parameters = SearchDevice(this_architecture, vendor.devices, parameter_names); if (parameters.size() != 0) { return parameters; } return SearchDevice("default", vendor.devices, parameter_names); } diff --git a/src/database/database.hpp b/src/database/database.hpp index 66cf93d5..e7a79cf9 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -23,6 +23,7 @@ #include "utilities/utilities.hpp" #include "database/database_structure.hpp" +#include "database/device_mapping.hpp" namespace clblast { // ================================================================================================= @@ -34,9 +35,6 @@ class Database { // The OpenCL device vendors static const std::string kDeviceVendorAll; - // Alternative names for some OpenCL vendors - static const std::unordered_map<std::string, std::string> kVendorNames; - // The database consists of separate database entries, stored together in a vector static const std::vector<database::DatabaseEntry> database; @@ -61,18 +59,19 @@ class Database { private: // Search method functions, returning a set of parameters (possibly empty) - database::Parameters Search(const std::string &this_kernel, const std::string &this_type, - const std::string &this_vendor, const std::string &this_device, - const Precision this_precision, - const std::vector<database::DatabaseEntry> &db) const; + database::Parameters Search(const std::string &this_kernel, + const std::string &this_vendor, const std::string &this_type, + const std::string &this_device, const std::string &this_architecture, + const Precision this_precision, + const std::vector<database::DatabaseEntry> &db) const; database::Parameters SearchDevice(const std::string &target_device, const std::vector<database::DatabaseDevice> &devices, const std::vector<std::string> ¶meter_names) const; database::Parameters SearchVendorAndType(const std::string &target_vendor, - const std::string &target_type, - const std::string &this_device, - const std::vector<database::DatabaseVendor> &vendors, - const std::vector<std::string> ¶meter_names) const; + const std::string &target_type, + const std::string &this_device, const std::string &this_architecture, + const std::vector<database::DatabaseVendor> &vendors, + const std::vector<std::string> ¶meter_names) const; // Found parameters suitable for this device/kernel std::shared_ptr<database::Parameters> parameters_; diff --git a/src/database/database_structure.hpp b/src/database/database_structure.hpp index 961ab239..4d7f967d 100644 --- a/src/database/database_structure.hpp +++ b/src/database/database_structure.hpp @@ -11,8 +11,8 @@ // // ================================================================================================= -#ifndef CLBLAST_DATABASE_STRUCTURE_H_ -#define CLBLAST_DATABASE_STRUCTURE_H_ +#ifndef CLBLAST_DATABASE_DATABASE_STRUCTURE_H_ +#define CLBLAST_DATABASE_DATABASE_STRUCTURE_H_ #include <string> #include <vector> @@ -54,5 +54,5 @@ struct DatabaseEntry { } // namespace database } // namespace clblast -// CLBLAST_DATABASE_STRUCTURE_H_ +// CLBLAST_DATABASE_DATABASE_STRUCTURE_H_ #endif diff --git a/src/database/device_mapping.hpp b/src/database/device_mapping.hpp new file mode 100644 index 00000000..9fb5d81d --- /dev/null +++ b/src/database/device_mapping.hpp @@ -0,0 +1,51 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file describes the mappings of extracted names from OpenCL (device, board, vendor, etc.) to +// more commonly used names to match devices from different vendors and platforms properly. +// +// ================================================================================================= + +#ifndef CLBLAST_DATABASE_DEVICE_MAPPING_H_ +#define CLBLAST_DATABASE_DEVICE_MAPPING_H_ + +#include <string> +#include <unordered_map> + +namespace clblast { +// A special namespace to hold all the global constant variables (including the device mapping) +namespace database { + +// ================================================================================================= + +// Alternative names for some vendor names (top-level) +const std::unordered_map<std::string, std::string> kVendorNames { + { "Intel(R) Corporation", "Intel" }, + { "GenuineIntel", "Intel" }, + { "Advanced Micro Devices, Inc.", "AMD" }, + { "NVIDIA Corporation", "NVIDIA" }, +}; + +// Alternative names for some architectures (mid-level) +const std::unordered_map<std::string, std::string> kArchitectureNames { + {"gfx803", "Fiji"}, + {"gfx900", "Vega"}, +}; + +// Alternative names for some devices (low-level) +const std::unordered_map<std::string, std::string> kDeviceNames { + // Empty +}; + +// ================================================================================================= +} // namespace database +} // namespace clblast + +// CLBLAST_DATABASE_DEVICE_MAPPING_H_ +#endif diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index 0cd00438..4ff419a5 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -390,17 +390,14 @@ template <> Precision PrecisionValue<double2>() { return Precision::kComplexDoub template <> bool PrecisionSupported<float>(const Device &) { return true; } template <> bool PrecisionSupported<float2>(const Device &) { return true; } template <> bool PrecisionSupported<double>(const Device &device) { - auto extensions = device.Capabilities(); - return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true; + return device.HasExtension(kKhronosDoublePrecision); } template <> bool PrecisionSupported<double2>(const Device &device) { - auto extensions = device.Capabilities(); - return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true; + return device.HasExtension(kKhronosDoublePrecision); } template <> bool PrecisionSupported<half>(const Device &device) { - auto extensions = device.Capabilities(); if (device.Name() == "Mali-T628") { return true; } // supports fp16 but not cl_khr_fp16 officially - return (extensions.find(kKhronosHalfPrecision) == std::string::npos) ? false : true; + return device.HasExtension(kKhronosHalfPrecision); } // ================================================================================================= diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index fae69b63..3ffb3be5 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -40,6 +40,8 @@ using double2 = std::complex<double>; // Khronos OpenCL extensions const std::string kKhronosHalfPrecision = "cl_khr_fp16"; const std::string kKhronosDoublePrecision = "cl_khr_fp64"; +const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query"; +const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query"; // Catched an unknown error constexpr auto kUnknownError = -999; |