From ce369702d88a679d906677d9266a17cb72d78ff7 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 7 Apr 2017 07:34:32 +0200 Subject: Added some missing const-ness --- src/routines/level1/xaxpy.cpp | 10 +++++----- src/routines/level2/xgemv.cpp | 18 +++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp index 39f61ef4..310562a0 100644 --- a/src/routines/level1/xaxpy.cpp +++ b/src/routines/level1/xaxpy.cpp @@ -44,12 +44,12 @@ void Xaxpy::DoAxpy(const size_t n, const T alpha, TestVectorY(n, y_buffer, y_offset, y_inc); // Determines whether or not the fast-version can be used - bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) && - (y_offset == 0) && (y_inc == 1) && - IsMultiple(n, db_["WGS"]*db_["WPT"]*db_["VW"]); + const auto use_fast_kernel = (x_offset == 0) && (x_inc == 1) && + (y_offset == 0) && (y_inc == 1) && + IsMultiple(n, db_["WGS"]*db_["WPT"]*db_["VW"]); // If possible, run the fast-version of the kernel - auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy"; + const auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy"; // Retrieves the Xaxpy kernel from the compiled binary auto kernel = Kernel(program_, kernel_name); @@ -79,7 +79,7 @@ void Xaxpy::DoAxpy(const size_t n, const T alpha, RunKernel(kernel, queue_, device_, global, local, event_); } else { - auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); + const auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); auto global = std::vector{n_ceiled/db_["WPT"]}; auto local = std::vector{db_["WGS"]}; RunKernel(kernel, queue_, device_, global, local, event_); diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 3b5b5e8b..b7e8081b 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -70,14 +70,14 @@ void Xgemv::MatVec(const Layout layout, const Transpose a_transpose, if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); } // Computes whether or not the matrix has an alternative layout (row or column-major). - auto a_altlayout = (layout == Layout::kRowMajor); + const auto a_altlayout = (layout == Layout::kRowMajor); auto a_one = (a_altlayout) ? n : m; - auto a_two = (a_altlayout) ? m : n; + const auto a_two = (a_altlayout) ? m : n; // Swap m and n if the matrix is transposed - auto a_transposed = (a_transpose != Transpose::kNo); - auto m_real = (a_transposed) ? n : m; - auto n_real = (a_transposed) ? m : n; + const auto a_transposed = (a_transpose != Transpose::kNo); + const auto m_real = (a_transposed) ? n : m; + const auto n_real = (a_transposed) ? m : n; // Special adjustments for banded matrices if (kl != 0 || ku != 0) { @@ -85,10 +85,10 @@ void Xgemv::MatVec(const Layout layout, const Transpose a_transpose, } // Determines whether the kernel needs to perform rotated access ('^' is the XOR operator) - auto a_rotated = a_transposed ^ a_altlayout; + const auto a_rotated = a_transposed ^ a_altlayout; // In case of complex data-types, the transpose can also become a conjugate transpose - auto a_conjugate = (a_transpose == Transpose::kConjugate); + const auto a_conjugate = (a_transpose == Transpose::kConjugate); // Tests the matrix and the vectors for validity if (packed) { TestMatrixAP(n, a_buffer, a_offset); } @@ -107,8 +107,8 @@ void Xgemv::MatVec(const Layout layout, const Transpose a_transpose, IsMultiple(a_ld, db_["VW3"]); // If possible, run the fast-version (rotated or non-rotated) of the kernel - auto kernel_name = "Xgemv"; - auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]); + auto kernel_name = std::string{"Xgemv"}; + const auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]); auto global_size = m_ceiled / db_["WPT1"]; auto local_size = db_["WGS1"]; if (fast_kernel) { -- cgit v1.2.3 From d28ee082b0844086ee9b9cfb50825427f0b3a48a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 7 Apr 2017 07:35:15 +0200 Subject: Uses float2 and double2 for base complex data-types instead of a custom struct; fixes bug on Apple OpenCL --- src/kernels/common.opencl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 32e3fbb9..db4c8ec4 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -69,7 +69,7 @@ R"( // Complex single-precision #elif PRECISION == 3232 - typedef struct cfloat {float x; float y;} real; + typedef float2 real; typedef struct cfloat2 {real x; real y;} real2; typedef struct cfloat4 {real x; real y; real z; real w;} real4; typedef struct cfloat8 {real s0; real s1; real s2; real s3; @@ -84,7 +84,7 @@ R"( // Complex double-precision #elif PRECISION == 6464 - typedef struct cdouble {double x; double y;} real; + typedef double2 real; typedef struct cdouble2 {real x; real y;} real2; typedef struct cdouble4 {real x; real y; real z; real w;} real4; typedef struct cdouble8 {real s0; real s1; real s2; real s3; -- cgit v1.2.3 From fb6c78ea070fbfc7d2d38a3c0d77a8219cef6f04 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 7 Apr 2017 07:37:30 +0200 Subject: Added a special override database for the Apple CPU implementation on OS X: this makes the test work, it does not focus on good performance --- include/clblast.h | 2 +- src/clpp11.hpp | 15 ++++++++ src/database/apple_cpu_fallback.hpp | 70 +++++++++++++++++++++++++++++++++++++ src/database/database.cpp | 33 ++++++++++++++--- src/database/database.hpp | 3 ++ src/utilities/utilities.cpp | 2 ++ 6 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 src/database/apple_cpu_fallback.hpp diff --git a/include/clblast.h b/include/clblast.h index 2520d601..54944ea2 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -119,7 +119,7 @@ enum class Side { kLeft = 141, kRight = 142 }; // Precision scoped enum (values in bits) enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64, - kComplexSingle = 3232, kComplexDouble = 6464 }; + kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 }; // ================================================================================================= // BLAS level-1 (vector-vector) routines diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 29f81cf8..e0b8cbe9 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -164,6 +164,10 @@ class Platform { platform_ = platforms[platform_id]; } + // Methods to retrieve platform information + std::string Name() const { return GetInfoString(CL_PLATFORM_NAME); } + std::string Vendor() const { return GetInfoString(CL_PLATFORM_VENDOR); } + // Returns the number of devices on this platform size_t NumDevices() const { auto result = cl_uint{0}; @@ -175,6 +179,17 @@ class Platform { const cl_platform_id& operator()() const { return platform_; } private: cl_platform_id platform_; + + // Private helper functions + std::string GetInfoString(const cl_device_info info) const { + auto bytes = size_t{0}; + CheckError(clGetPlatformInfo(platform_, info, 0, nullptr, &bytes)); + auto result = std::string{}; + result.resize(bytes); + CheckError(clGetPlatformInfo(platform_, info, bytes, &result[0], nullptr)); + result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters + return result; + } }; // Retrieves a vector with all platforms diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp new file mode 100644 index 00000000..89ac8f71 --- /dev/null +++ b/src/database/apple_cpu_fallback.hpp @@ -0,0 +1,70 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file provides overrides for Apple's OpenCL CPU implementation. It is a special case compared +// to all other implementations, as it only supports a 1-dimensional work-group size. In addition, +// that work-group size is limited to 1024 (in theory) or much lower (kernel resource dependent). +// Thus, instead of supporting this corner-case in the whole regular flow (starting from the tuner), +// we provide this file with some manual overrides. +// +// Note: These overrides are to make the Apple CPU work and not crash, they are not in any way +// optimized parameters. For decent speed don't use Apple's OpenCL CPU implementation. +// +// ================================================================================================= + +namespace clblast { +namespace database { +// ================================================================================================= + +const Database::DatabaseEntry XaxpyApple = { + "Xaxpy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW",8}, {"WGS",1}, {"WPT",4} } } } } } +}; +const Database::DatabaseEntry XdotApple = { + "Xdot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WGS2",1} } } } } } +}; +const Database::DatabaseEntry XgemvApple = { + "Xgemv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WPT1",4}, {"UNROLL1", 1} } } } } } +}; +const Database::DatabaseEntry XgemvFastApple = { + "XgemvFast", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW2",1}, {"WGS2",1}, {"WPT2",1} } } } } } +}; +const Database::DatabaseEntry XgemvFastRotApple = { + "XgemvFastRot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW3",1}, {"WGS3",1}, {"WPT3",1} } } } } } +}; +const Database::DatabaseEntry XgerApple = { + "Xger", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } } } } } +}; +const Database::DatabaseEntry XtrsvApple = { + "Xtrsv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRSV_BLOCK_SIZE",32} } } } } } +}; +const Database::DatabaseEntry XgemmApple = { + "Xgemm", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWG",1}, {"KWI",1}, {"MDIMA",1}, {"MDIMC",1}, {"MWG",1}, {"NDIMB",1}, {"NDIMC",1}, {"NWG",1}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } } } } } +}; +const Database::DatabaseEntry XgemmDirectApple = { + "XgemmDirect", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWID",1}, {"MDIMAD",1}, {"MDIMCD",1}, {"NDIMBD",1}, {"NDIMCD",1}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",1} } } } } } +}; +const Database::DatabaseEntry CopyApple = { + "Copy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"COPY_DIMX",1}, {"COPY_DIMY",1}, {"COPY_VW",1}, {"COPY_WPT",1} } } } } } +}; +const Database::DatabaseEntry PadApple = { + "Pad", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PAD_DIMX",1}, {"PAD_DIMY",1}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } } } } } +}; +const Database::DatabaseEntry TransposeApple = { + "Transpose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRA_DIM",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } } } } } +}; +const Database::DatabaseEntry PadtransposeApple = { + "Padtranspose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",1}, {"PADTRA_WPT",1} } } } } } +}; +const Database::DatabaseEntry InvertApple = { + "Invert", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"INTERNAL_BLOCK_SIZE",16} } } } } } +}; + +// ================================================================================================= +} // namespace database +} // namespace clblast diff --git a/src/database/database.cpp b/src/database/database.cpp index f1d1dc66..fe026577 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -11,6 +11,8 @@ // // ================================================================================================= +#include + #include "utilities/utilities.hpp" #include "database/database.hpp" @@ -28,12 +30,13 @@ #include "database/kernels/transpose.hpp" #include "database/kernels/padtranspose.hpp" #include "database/kernels/invert.hpp" +#include "database/apple_cpu_fallback.hpp" #include "database/kernel_selection.hpp" namespace clblast { // ================================================================================================= -// Initializes the database +// Initializes the databases const std::vector Database::database = { &database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble, &database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble, @@ -51,8 +54,15 @@ const std::vector Database::database = { &database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble, &database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble }; +const std::vector Database::apple_cpu_fallback = { + &database::XaxpyApple, &database::XdotApple, + &database::XgemvApple, &database::XgemvFastApple, &database::XgemvFastRotApple, &database::XgerApple, &database::XtrsvApple, + &database::XgemmApple, &database::XgemmDirectApple, + &database::CopyApple, &database::PadApple, &database::TransposeApple, &database::PadtransposeApple, + &database::InvertApple +}; -// The OpenCL device vendors +// The default values const std::string Database::kDeviceVendorAll = "default"; // Alternative names for some OpenCL vendors @@ -83,9 +93,23 @@ Database::Database(const Device &device, const std::string &kernel_name, } } + // Sets the databases to search through + auto databases = std::list>{overlay, database}; + + // Special case: modifies the database if the device is a CPU with Apple OpenCL + #if defined(__APPLE__) || defined(__MACOSX) + if (device.Type() == "CPU") { + auto extensions = device.Capabilities(); + const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true; + if (is_apple) { + databases.push_front(apple_cpu_fallback); + } + } + #endif + // Searches potentially multiple databases auto search_result = ParametersPtr{}; - for (auto &db: { overlay, database}) { + for (auto &db: databases) { search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db); if (search_result) { parameters_->insert(search_result->begin(), search_result->end()); @@ -128,7 +152,8 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel, // Selects the right kernel for (auto &db: this_database) { - if (db->kernel == this_kernel && db->precision == this_precision) { + if ((db->kernel == this_kernel) && + (db->precision == this_precision || db->precision == Precision::kAny)) { // Searches for the right vendor and device type, or selects the default if unavailable. This // assumes that the default vendor / device type is last in the database. diff --git a/src/database/database.hpp b/src/database/database.hpp index b34e0d8a..e49146c5 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -72,6 +72,9 @@ class Database { // The database consists of separate database entries, stored together in a vector static const std::vector database; + // Database for a special case: Apple CPUs support limited number of threads + static const std::vector apple_cpu_fallback; + Database() = default; // The constructor with a user-provided database overlay (potentially an empty vector) diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index 0f2661ad..95b70cd5 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -176,6 +176,7 @@ std::string ToString(Precision value) { case Precision::kDouble: return ToString(static_cast(value))+" (double)"; case Precision::kComplexSingle: return ToString(static_cast(value))+" (complex-single)"; case Precision::kComplexDouble: return ToString(static_cast(value))+" (complex-double)"; + case Precision::kAny: return ToString(static_cast(value))+" (any)"; } } template <> @@ -467,6 +468,7 @@ size_t GetBytes(const Precision precision) { case Precision::kDouble: return 8; case Precision::kComplexSingle: return 8; case Precision::kComplexDouble: return 16; + case Precision::kAny: return -1; } } -- cgit v1.2.3 From 300531b869ca266c22d4580761872a4ebb6a244b Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 10 Apr 2017 07:21:34 +0200 Subject: Updated the changelog with the Apple CPU override --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 1455cf19..0b4e9951 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,7 @@ Development version (next release) - Fixed a bug having to re-create the binary even if it was in the cache - Fixed a bug when using offsets in the direct version of the GEMM kernels - Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs +- Fixed tests on Apple's CPU OpenCL implementation; still not fast but correct at least - Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM - Tests now also exit with an error code when OpenCL errors or compilation errors occur - Tests now also check for the L2 error in case of half-precision -- cgit v1.2.3 From 2d45c37676d551f53095f7ffa2f178105f5930b2 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 10 Apr 2017 07:40:27 +0200 Subject: Removed const-vector-of-const-objects from the database class to remain according to the C++11 standard --- scripts/generator/generator.py | 2 +- src/clblast.cpp | 3 ++- src/database/database.cpp | 56 +++++++++++++++++++++--------------------- src/database/database.hpp | 8 +++--- src/routine.cpp | 4 +-- src/routine.hpp | 4 +-- 6 files changed, 39 insertions(+), 38 deletions(-) diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 086b27d3..6ec67052 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -42,7 +42,7 @@ FILES = [ "/src/clblast_netlib_c.cpp", ] HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32] -FOOTER_LINES = [25, 138, 27, 38, 6, 6, 9, 2] +FOOTER_LINES = [25, 139, 27, 38, 6, 6, 9, 2] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 diff --git a/src/clblast.cpp b/src/clblast.cpp index a8bcf91d..78548eba 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -2441,7 +2441,8 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern const auto database_device = Database::DatabaseDevice{"default", parameters}; const auto database_vendor = Database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}}; const auto database_entry = Database::DatabaseEntry{kernel_name, precision, {database_vendor}}; - const auto database = Database(device_cpp, kernel_name, precision, {&database_entry}); + const auto database_entries = std::vector{database_entry}; + const auto database = Database(device_cpp, kernel_name, precision, database_entries); // Removes the old database entry and stores the new one in the cache DatabaseCache::Instance().Remove(DatabaseKey{ precision, device_name, kernel_name }); diff --git a/src/database/database.cpp b/src/database/database.cpp index fe026577..a1cc3570 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -37,29 +37,29 @@ namespace clblast { // ================================================================================================= // Initializes the databases -const std::vector Database::database = { - &database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble, - &database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble, - &database::XgemvHalf, &database::XgemvSingle, &database::XgemvDouble, &database::XgemvComplexSingle, &database::XgemvComplexDouble, - &database::XgemvFastHalf, &database::XgemvFastSingle, &database::XgemvFastDouble, &database::XgemvFastComplexSingle, &database::XgemvFastComplexDouble, - &database::XgemvFastRotHalf, &database::XgemvFastRotSingle, &database::XgemvFastRotDouble, &database::XgemvFastRotComplexSingle, &database::XgemvFastRotComplexDouble, - &database::XgerHalf, &database::XgerSingle, &database::XgerDouble, &database::XgerComplexSingle, &database::XgerComplexDouble, - &database::XtrsvHalf, &database::XtrsvSingle, &database::XtrsvDouble, &database::XtrsvComplexSingle, &database::XtrsvComplexDouble, - &database::XgemmHalf, &database::XgemmSingle, &database::XgemmDouble, &database::XgemmComplexSingle, &database::XgemmComplexDouble, - &database::XgemmDirectHalf, &database::XgemmDirectSingle, &database::XgemmDirectDouble, &database::XgemmDirectComplexSingle, &database::XgemmDirectComplexDouble, - &database::CopyHalf, &database::CopySingle, &database::CopyDouble, &database::CopyComplexSingle, &database::CopyComplexDouble, - &database::PadHalf, &database::PadSingle, &database::PadDouble, &database::PadComplexSingle, &database::PadComplexDouble, - &database::TransposeHalf, &database::TransposeSingle, &database::TransposeDouble, &database::TransposeComplexSingle, &database::TransposeComplexDouble, - &database::PadtransposeHalf, &database::PadtransposeSingle, &database::PadtransposeDouble, &database::PadtransposeComplexSingle, &database::PadtransposeComplexDouble, - &database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble, - &database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble +const auto Database::database = std::vector{ + database::XaxpyHalf, database::XaxpySingle, database::XaxpyDouble, database::XaxpyComplexSingle, database::XaxpyComplexDouble, + database::XdotHalf, database::XdotSingle, database::XdotDouble, database::XdotComplexSingle, database::XdotComplexDouble, + database::XgemvHalf, database::XgemvSingle, database::XgemvDouble, database::XgemvComplexSingle, database::XgemvComplexDouble, + database::XgemvFastHalf, database::XgemvFastSingle, database::XgemvFastDouble, database::XgemvFastComplexSingle, database::XgemvFastComplexDouble, + database::XgemvFastRotHalf, database::XgemvFastRotSingle, database::XgemvFastRotDouble, database::XgemvFastRotComplexSingle, database::XgemvFastRotComplexDouble, + database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble, + database::XtrsvHalf, database::XtrsvSingle, database::XtrsvDouble, database::XtrsvComplexSingle, database::XtrsvComplexDouble, + database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble, + database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble, + database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble, + database::PadHalf, database::PadSingle, database::PadDouble, database::PadComplexSingle, database::PadComplexDouble, + database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble, + database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble, + database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble, + database::KernelSelectionHalf, database::KernelSelectionSingle, database::KernelSelectionDouble, database::KernelSelectionComplexSingle, database::KernelSelectionComplexDouble }; -const std::vector Database::apple_cpu_fallback = { - &database::XaxpyApple, &database::XdotApple, - &database::XgemvApple, &database::XgemvFastApple, &database::XgemvFastRotApple, &database::XgerApple, &database::XtrsvApple, - &database::XgemmApple, &database::XgemmDirectApple, - &database::CopyApple, &database::PadApple, &database::TransposeApple, &database::PadtransposeApple, - &database::InvertApple +const auto Database::apple_cpu_fallback = std::vector{ + database::XaxpyApple, database::XdotApple, + database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple, + database::XgemmApple, database::XgemmDirectApple, + database::CopyApple, database::PadApple, database::TransposeApple, database::PadtransposeApple, + database::InvertApple }; // The default values @@ -78,7 +78,7 @@ const std::unordered_map Database::kVendorNames{ // Constructor, computing device properties and populating the parameter-vector from the database. // This takes an optional overlay database in case of custom tuning or custom kernels. Database::Database(const Device &device, const std::string &kernel_name, - const Precision precision, const std::vector &overlay): + const Precision precision, const std::vector &overlay): parameters_(std::make_shared()) { // Finds information of the current device @@ -94,7 +94,7 @@ Database::Database(const Device &device, const std::string &kernel_name, } // Sets the databases to search through - auto databases = std::list>{overlay, database}; + auto databases = std::list>{overlay, database}; // Special case: modifies the database if the device is a CPU with Apple OpenCL #if defined(__APPLE__) || defined(__MACOSX) @@ -148,16 +148,16 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel, const std::string &this_vendor, const std::string &this_device, const Precision this_precision, - const std::vector &this_database) const { + const std::vector &this_database) const { // Selects the right kernel for (auto &db: this_database) { - if ((db->kernel == this_kernel) && - (db->precision == this_precision || db->precision == Precision::kAny)) { + if ((db.kernel == this_kernel) && + (db.precision == this_precision || db.precision == Precision::kAny)) { // Searches for the right vendor and device type, or selects the default if unavailable. This // assumes that the default vendor / device type is last in the database. - for (auto &vendor: db->vendors) { + for (auto &vendor: db.vendors) { if ((vendor.name == this_vendor || vendor.name == kDeviceVendorAll) && (vendor.type == this_type || vendor.type == database::kDeviceTypeAll)) { diff --git a/src/database/database.hpp b/src/database/database.hpp index e49146c5..82fbc252 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -70,16 +70,16 @@ class Database { static const std::unordered_map kVendorNames; // The database consists of separate database entries, stored together in a vector - static const std::vector database; + static const std::vector database; // Database for a special case: Apple CPUs support limited number of threads - static const std::vector apple_cpu_fallback; + static const std::vector apple_cpu_fallback; Database() = default; // The constructor with a user-provided database overlay (potentially an empty vector) explicit Database(const Device &device, const std::string &kernel_name, - const Precision precision, const std::vector &overlay); + const Precision precision, const std::vector &overlay); // Accessor of values by key size_t operator[](const std::string &key) const { return parameters_->find(key)->second; } @@ -96,7 +96,7 @@ class Database { ParametersPtr Search(const std::string &this_kernel, const std::string &this_type, const std::string &this_vendor, const std::string &this_device, const Precision this_precision, - const std::vector &db) const; + const std::vector &db) const; // Found parameters suitable for this device/kernel std::shared_ptr parameters_; diff --git a/src/routine.cpp b/src/routine.cpp index b5823bc9..cb39c7ee 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -51,7 +51,7 @@ const std::unordered_map> Routine::r // The constructor does all heavy work, errors are returned as exceptions Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &kernel_names, const Precision precision, - const std::vector &userDatabase, + const std::vector &userDatabase, std::initializer_list source): precision_(precision), routine_name_(name), @@ -67,7 +67,7 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name, InitProgram(source); } -void Routine::InitDatabase(const std::vector &userDatabase) { +void Routine::InitDatabase(const std::vector &userDatabase) { for (const auto &kernel_name : kernel_names_) { // Queries the cache to see whether or not the kernel parameter database is already there diff --git a/src/routine.hpp b/src/routine.hpp index eb11b566..903ccdb1 100644 --- a/src/routine.hpp +++ b/src/routine.hpp @@ -40,7 +40,7 @@ class Routine { // and routine list, otherwise the caching logic will break. explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase, + const std::vector &userDatabase, std::initializer_list source); // List of kernel-routine look-ups @@ -59,7 +59,7 @@ class Routine { void InitProgram(std::initializer_list source); // Initializes db_, fetching cached database or building one - void InitDatabase(const std::vector &userDatabase); + void InitDatabase(const std::vector &userDatabase); protected: -- cgit v1.2.3 From 7374c37e2e11d404dd9b330ffa50e49853078677 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 10 Apr 2017 08:38:24 +0200 Subject: Fixed a compilation issue under MSVC and GCC --- src/database/database.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/database/database.cpp b/src/database/database.cpp index a1cc3570..404be804 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -37,7 +37,7 @@ namespace clblast { // ================================================================================================= // Initializes the databases -const auto Database::database = std::vector{ +const std::vector Database::database = std::vector{ database::XaxpyHalf, database::XaxpySingle, database::XaxpyDouble, database::XaxpyComplexSingle, database::XaxpyComplexDouble, database::XdotHalf, database::XdotSingle, database::XdotDouble, database::XdotComplexSingle, database::XdotComplexDouble, database::XgemvHalf, database::XgemvSingle, database::XgemvDouble, database::XgemvComplexSingle, database::XgemvComplexDouble, @@ -54,7 +54,7 @@ const auto Database::database = std::vector{ database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble, database::KernelSelectionHalf, database::KernelSelectionSingle, database::KernelSelectionDouble, database::KernelSelectionComplexSingle, database::KernelSelectionComplexDouble }; -const auto Database::apple_cpu_fallback = std::vector{ +const std::vector Database::apple_cpu_fallback = std::vector{ database::XaxpyApple, database::XdotApple, database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple, database::XgemmApple, database::XgemmDirectApple, -- cgit v1.2.3