summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-04-10 20:09:40 +0200
committerGitHub <noreply@github.com>2017-04-10 20:09:40 +0200
commit0da1e380974007f69b827f6b10ef0243249d0c5e (patch)
treec817ab1a5dd4cb79f9f917e5399659ddea5fb2ee /src
parentea0aeadc34cdc3e352aea537d5265dd447afa1f6 (diff)
parent7374c37e2e11d404dd9b330ffa50e49853078677 (diff)
Merge pull request #145 from CNugteren/apple_cpu_support
Patch to make tests complete on Apple's CPU implementation
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cpp3
-rw-r--r--src/clpp11.hpp15
-rw-r--r--src/database/apple_cpu_fallback.hpp70
-rw-r--r--src/database/database.cpp71
-rw-r--r--src/database/database.hpp9
-rw-r--r--src/kernels/common.opencl4
-rw-r--r--src/routine.cpp4
-rw-r--r--src/routine.hpp4
-rw-r--r--src/routines/level1/xaxpy.cpp10
-rw-r--r--src/routines/level2/xgemv.cpp18
-rw-r--r--src/utilities/utilities.cpp2
11 files changed, 163 insertions, 47 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp
index a8bcf91d..78548eba 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -2441,7 +2441,8 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern
const auto database_device = Database::DatabaseDevice{"default", parameters};
const auto database_vendor = Database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}};
const auto database_entry = Database::DatabaseEntry{kernel_name, precision, {database_vendor}};
- const auto database = Database(device_cpp, kernel_name, precision, {&database_entry});
+ const auto database_entries = std::vector<Database::DatabaseEntry>{database_entry};
+ const auto database = Database(device_cpp, kernel_name, precision, database_entries);
// Removes the old database entry and stores the new one in the cache
DatabaseCache::Instance().Remove(DatabaseKey{ precision, device_name, kernel_name });
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index 29f81cf8..e0b8cbe9 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -164,6 +164,10 @@ class Platform {
platform_ = platforms[platform_id];
}
+ // Methods to retrieve platform information
+ std::string Name() const { return GetInfoString(CL_PLATFORM_NAME); }
+ std::string Vendor() const { return GetInfoString(CL_PLATFORM_VENDOR); }
+
// Returns the number of devices on this platform
size_t NumDevices() const {
auto result = cl_uint{0};
@@ -175,6 +179,17 @@ class Platform {
const cl_platform_id& operator()() const { return platform_; }
private:
cl_platform_id platform_;
+
+ // Private helper functions
+ std::string GetInfoString(const cl_device_info info) const {
+ auto bytes = size_t{0};
+ CheckError(clGetPlatformInfo(platform_, info, 0, nullptr, &bytes));
+ auto result = std::string{};
+ result.resize(bytes);
+ CheckError(clGetPlatformInfo(platform_, info, bytes, &result[0], nullptr));
+ result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters
+ return result;
+ }
};
// Retrieves a vector with all platforms
diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp
new file mode 100644
index 00000000..89ac8f71
--- /dev/null
+++ b/src/database/apple_cpu_fallback.hpp
@@ -0,0 +1,70 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file provides overrides for Apple's OpenCL CPU implementation. It is a special case compared
+// to all other implementations, as it only supports a 1-dimensional work-group size. In addition,
+// that work-group size is limited to 1024 (in theory) or much lower (kernel resource dependent).
+// Thus, instead of supporting this corner-case in the whole regular flow (starting from the tuner),
+// we provide this file with some manual overrides.
+//
+// Note: These overrides are to make the Apple CPU work and not crash, they are not in any way
+// optimized parameters. For decent speed don't use Apple's OpenCL CPU implementation.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+// =================================================================================================
+
+const Database::DatabaseEntry XaxpyApple = {
+ "Xaxpy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW",8}, {"WGS",1}, {"WPT",4} } } } } }
+};
+const Database::DatabaseEntry XdotApple = {
+ "Xdot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WGS2",1} } } } } }
+};
+const Database::DatabaseEntry XgemvApple = {
+ "Xgemv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WPT1",4}, {"UNROLL1", 1} } } } } }
+};
+const Database::DatabaseEntry XgemvFastApple = {
+ "XgemvFast", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW2",1}, {"WGS2",1}, {"WPT2",1} } } } } }
+};
+const Database::DatabaseEntry XgemvFastRotApple = {
+ "XgemvFastRot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW3",1}, {"WGS3",1}, {"WPT3",1} } } } } }
+};
+const Database::DatabaseEntry XgerApple = {
+ "Xger", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } } } } }
+};
+const Database::DatabaseEntry XtrsvApple = {
+ "Xtrsv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRSV_BLOCK_SIZE",32} } } } } }
+};
+const Database::DatabaseEntry XgemmApple = {
+ "Xgemm", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWG",1}, {"KWI",1}, {"MDIMA",1}, {"MDIMC",1}, {"MWG",1}, {"NDIMB",1}, {"NDIMC",1}, {"NWG",1}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } } } } }
+};
+const Database::DatabaseEntry XgemmDirectApple = {
+ "XgemmDirect", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWID",1}, {"MDIMAD",1}, {"MDIMCD",1}, {"NDIMBD",1}, {"NDIMCD",1}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",1} } } } } }
+};
+const Database::DatabaseEntry CopyApple = {
+ "Copy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"COPY_DIMX",1}, {"COPY_DIMY",1}, {"COPY_VW",1}, {"COPY_WPT",1} } } } } }
+};
+const Database::DatabaseEntry PadApple = {
+ "Pad", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PAD_DIMX",1}, {"PAD_DIMY",1}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } } } } }
+};
+const Database::DatabaseEntry TransposeApple = {
+ "Transpose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRA_DIM",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } } } } }
+};
+const Database::DatabaseEntry PadtransposeApple = {
+ "Padtranspose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",1}, {"PADTRA_WPT",1} } } } } }
+};
+const Database::DatabaseEntry InvertApple = {
+ "Invert", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"INTERNAL_BLOCK_SIZE",16} } } } } }
+};
+
+// =================================================================================================
+} // namespace database
+} // namespace clblast
diff --git a/src/database/database.cpp b/src/database/database.cpp
index f1d1dc66..404be804 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -11,6 +11,8 @@
//
// =================================================================================================
+#include <list>
+
#include "utilities/utilities.hpp"
#include "database/database.hpp"
@@ -28,31 +30,39 @@
#include "database/kernels/transpose.hpp"
#include "database/kernels/padtranspose.hpp"
#include "database/kernels/invert.hpp"
+#include "database/apple_cpu_fallback.hpp"
#include "database/kernel_selection.hpp"
namespace clblast {
// =================================================================================================
-// Initializes the database
-const std::vector<const Database::DatabaseEntry*> Database::database = {
- &database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble,
- &database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble,
- &database::XgemvHalf, &database::XgemvSingle, &database::XgemvDouble, &database::XgemvComplexSingle, &database::XgemvComplexDouble,
- &database::XgemvFastHalf, &database::XgemvFastSingle, &database::XgemvFastDouble, &database::XgemvFastComplexSingle, &database::XgemvFastComplexDouble,
- &database::XgemvFastRotHalf, &database::XgemvFastRotSingle, &database::XgemvFastRotDouble, &database::XgemvFastRotComplexSingle, &database::XgemvFastRotComplexDouble,
- &database::XgerHalf, &database::XgerSingle, &database::XgerDouble, &database::XgerComplexSingle, &database::XgerComplexDouble,
- &database::XtrsvHalf, &database::XtrsvSingle, &database::XtrsvDouble, &database::XtrsvComplexSingle, &database::XtrsvComplexDouble,
- &database::XgemmHalf, &database::XgemmSingle, &database::XgemmDouble, &database::XgemmComplexSingle, &database::XgemmComplexDouble,
- &database::XgemmDirectHalf, &database::XgemmDirectSingle, &database::XgemmDirectDouble, &database::XgemmDirectComplexSingle, &database::XgemmDirectComplexDouble,
- &database::CopyHalf, &database::CopySingle, &database::CopyDouble, &database::CopyComplexSingle, &database::CopyComplexDouble,
- &database::PadHalf, &database::PadSingle, &database::PadDouble, &database::PadComplexSingle, &database::PadComplexDouble,
- &database::TransposeHalf, &database::TransposeSingle, &database::TransposeDouble, &database::TransposeComplexSingle, &database::TransposeComplexDouble,
- &database::PadtransposeHalf, &database::PadtransposeSingle, &database::PadtransposeDouble, &database::PadtransposeComplexSingle, &database::PadtransposeComplexDouble,
- &database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble,
- &database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble
+// Initializes the databases
+const std::vector<Database::DatabaseEntry> Database::database = std::vector<Database::DatabaseEntry>{
+ database::XaxpyHalf, database::XaxpySingle, database::XaxpyDouble, database::XaxpyComplexSingle, database::XaxpyComplexDouble,
+ database::XdotHalf, database::XdotSingle, database::XdotDouble, database::XdotComplexSingle, database::XdotComplexDouble,
+ database::XgemvHalf, database::XgemvSingle, database::XgemvDouble, database::XgemvComplexSingle, database::XgemvComplexDouble,
+ database::XgemvFastHalf, database::XgemvFastSingle, database::XgemvFastDouble, database::XgemvFastComplexSingle, database::XgemvFastComplexDouble,
+ database::XgemvFastRotHalf, database::XgemvFastRotSingle, database::XgemvFastRotDouble, database::XgemvFastRotComplexSingle, database::XgemvFastRotComplexDouble,
+ database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble,
+ database::XtrsvHalf, database::XtrsvSingle, database::XtrsvDouble, database::XtrsvComplexSingle, database::XtrsvComplexDouble,
+ database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble,
+ database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble,
+ database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble,
+ database::PadHalf, database::PadSingle, database::PadDouble, database::PadComplexSingle, database::PadComplexDouble,
+ database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
+ database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble,
+ database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble,
+ database::KernelSelectionHalf, database::KernelSelectionSingle, database::KernelSelectionDouble, database::KernelSelectionComplexSingle, database::KernelSelectionComplexDouble
+};
+const std::vector<Database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<Database::DatabaseEntry>{
+ database::XaxpyApple, database::XdotApple,
+ database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple,
+ database::XgemmApple, database::XgemmDirectApple,
+ database::CopyApple, database::PadApple, database::TransposeApple, database::PadtransposeApple,
+ database::InvertApple
};
-// The OpenCL device vendors
+// The default values
const std::string Database::kDeviceVendorAll = "default";
// Alternative names for some OpenCL vendors
@@ -68,7 +78,7 @@ const std::unordered_map<std::string, std::string> Database::kVendorNames{
// Constructor, computing device properties and populating the parameter-vector from the database.
// This takes an optional overlay database in case of custom tuning or custom kernels.
Database::Database(const Device &device, const std::string &kernel_name,
- const Precision precision, const std::vector<const DatabaseEntry*> &overlay):
+ const Precision precision, const std::vector<DatabaseEntry> &overlay):
parameters_(std::make_shared<Parameters>()) {
// Finds information of the current device
@@ -83,9 +93,23 @@ Database::Database(const Device &device, const std::string &kernel_name,
}
}
+ // Sets the databases to search through
+ auto databases = std::list<std::vector<DatabaseEntry>>{overlay, database};
+
+ // Special case: modifies the database if the device is a CPU with Apple OpenCL
+ #if defined(__APPLE__) || defined(__MACOSX)
+ if (device.Type() == "CPU") {
+ auto extensions = device.Capabilities();
+ const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
+ if (is_apple) {
+ databases.push_front(apple_cpu_fallback);
+ }
+ }
+ #endif
+
// Searches potentially multiple databases
auto search_result = ParametersPtr{};
- for (auto &db: { overlay, database}) {
+ for (auto &db: databases) {
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
if (search_result) {
parameters_->insert(search_result->begin(), search_result->end());
@@ -124,15 +148,16 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel,
const std::string &this_vendor,
const std::string &this_device,
const Precision this_precision,
- const std::vector<const DatabaseEntry*> &this_database) const {
+ const std::vector<DatabaseEntry> &this_database) const {
// Selects the right kernel
for (auto &db: this_database) {
- if (db->kernel == this_kernel && db->precision == this_precision) {
+ if ((db.kernel == this_kernel) &&
+ (db.precision == this_precision || db.precision == Precision::kAny)) {
// Searches for the right vendor and device type, or selects the default if unavailable. This
// assumes that the default vendor / device type is last in the database.
- for (auto &vendor: db->vendors) {
+ for (auto &vendor: db.vendors) {
if ((vendor.name == this_vendor || vendor.name == kDeviceVendorAll) &&
(vendor.type == this_type || vendor.type == database::kDeviceTypeAll)) {
diff --git a/src/database/database.hpp b/src/database/database.hpp
index b34e0d8a..82fbc252 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -70,13 +70,16 @@ class Database {
static const std::unordered_map<std::string, std::string> kVendorNames;
// The database consists of separate database entries, stored together in a vector
- static const std::vector<const DatabaseEntry*> database;
+ static const std::vector<DatabaseEntry> database;
+
+ // Database for a special case: Apple CPUs support limited number of threads
+ static const std::vector<DatabaseEntry> apple_cpu_fallback;
Database() = default;
// The constructor with a user-provided database overlay (potentially an empty vector)
explicit Database(const Device &device, const std::string &kernel_name,
- const Precision precision, const std::vector<const DatabaseEntry*> &overlay);
+ const Precision precision, const std::vector<DatabaseEntry> &overlay);
// Accessor of values by key
size_t operator[](const std::string &key) const { return parameters_->find(key)->second; }
@@ -93,7 +96,7 @@ class Database {
ParametersPtr Search(const std::string &this_kernel, const std::string &this_type,
const std::string &this_vendor, const std::string &this_device,
const Precision this_precision,
- const std::vector<const DatabaseEntry*> &db) const;
+ const std::vector<DatabaseEntry> &db) const;
// Found parameters suitable for this device/kernel
std::shared_ptr<Parameters> parameters_;
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 32e3fbb9..db4c8ec4 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -69,7 +69,7 @@ R"(
// Complex single-precision
#elif PRECISION == 3232
- typedef struct cfloat {float x; float y;} real;
+ typedef float2 real;
typedef struct cfloat2 {real x; real y;} real2;
typedef struct cfloat4 {real x; real y; real z; real w;} real4;
typedef struct cfloat8 {real s0; real s1; real s2; real s3;
@@ -84,7 +84,7 @@ R"(
// Complex double-precision
#elif PRECISION == 6464
- typedef struct cdouble {double x; double y;} real;
+ typedef double2 real;
typedef struct cdouble2 {real x; real y;} real2;
typedef struct cdouble4 {real x; real y; real z; real w;} real4;
typedef struct cdouble8 {real s0; real s1; real s2; real s3;
diff --git a/src/routine.cpp b/src/routine.cpp
index b5823bc9..cb39c7ee 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -51,7 +51,7 @@ const std::unordered_map<std::string, const std::vector<std::string>> Routine::r
// The constructor does all heavy work, errors are returned as exceptions
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &kernel_names, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ const std::vector<Database::DatabaseEntry> &userDatabase,
std::initializer_list<const char *> source):
precision_(precision),
routine_name_(name),
@@ -67,7 +67,7 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
InitProgram(source);
}
-void Routine::InitDatabase(const std::vector<const Database::DatabaseEntry*> &userDatabase) {
+void Routine::InitDatabase(const std::vector<Database::DatabaseEntry> &userDatabase) {
for (const auto &kernel_name : kernel_names_) {
// Queries the cache to see whether or not the kernel parameter database is already there
diff --git a/src/routine.hpp b/src/routine.hpp
index eb11b566..903ccdb1 100644
--- a/src/routine.hpp
+++ b/src/routine.hpp
@@ -40,7 +40,7 @@ class Routine {
// and routine list, otherwise the caching logic will break.
explicit Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ const std::vector<Database::DatabaseEntry> &userDatabase,
std::initializer_list<const char *> source);
// List of kernel-routine look-ups
@@ -59,7 +59,7 @@ class Routine {
void InitProgram(std::initializer_list<const char *> source);
// Initializes db_, fetching cached database or building one
- void InitDatabase(const std::vector<const Database::DatabaseEntry*> &userDatabase);
+ void InitDatabase(const std::vector<Database::DatabaseEntry> &userDatabase);
protected:
diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp
index 39f61ef4..310562a0 100644
--- a/src/routines/level1/xaxpy.cpp
+++ b/src/routines/level1/xaxpy.cpp
@@ -44,12 +44,12 @@ void Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
TestVectorY(n, y_buffer, y_offset, y_inc);
// Determines whether or not the fast-version can be used
- bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
- (y_offset == 0) && (y_inc == 1) &&
- IsMultiple(n, db_["WGS"]*db_["WPT"]*db_["VW"]);
+ const auto use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
+ (y_offset == 0) && (y_inc == 1) &&
+ IsMultiple(n, db_["WGS"]*db_["WPT"]*db_["VW"]);
// If possible, run the fast-version of the kernel
- auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy";
+ const auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy";
// Retrieves the Xaxpy kernel from the compiled binary
auto kernel = Kernel(program_, kernel_name);
@@ -79,7 +79,7 @@ void Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
RunKernel(kernel, queue_, device_, global, local, event_);
}
else {
- auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ const auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
auto local = std::vector<size_t>{db_["WGS"]};
RunKernel(kernel, queue_, device_, global, local, event_);
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp
index 3b5b5e8b..b7e8081b 100644
--- a/src/routines/level2/xgemv.cpp
+++ b/src/routines/level2/xgemv.cpp
@@ -70,14 +70,14 @@ void Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrix has an alternative layout (row or column-major).
- auto a_altlayout = (layout == Layout::kRowMajor);
+ const auto a_altlayout = (layout == Layout::kRowMajor);
auto a_one = (a_altlayout) ? n : m;
- auto a_two = (a_altlayout) ? m : n;
+ const auto a_two = (a_altlayout) ? m : n;
// Swap m and n if the matrix is transposed
- auto a_transposed = (a_transpose != Transpose::kNo);
- auto m_real = (a_transposed) ? n : m;
- auto n_real = (a_transposed) ? m : n;
+ const auto a_transposed = (a_transpose != Transpose::kNo);
+ const auto m_real = (a_transposed) ? n : m;
+ const auto n_real = (a_transposed) ? m : n;
// Special adjustments for banded matrices
if (kl != 0 || ku != 0) {
@@ -85,10 +85,10 @@ void Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
}
// Determines whether the kernel needs to perform rotated access ('^' is the XOR operator)
- auto a_rotated = a_transposed ^ a_altlayout;
+ const auto a_rotated = a_transposed ^ a_altlayout;
// In case of complex data-types, the transpose can also become a conjugate transpose
- auto a_conjugate = (a_transpose == Transpose::kConjugate);
+ const auto a_conjugate = (a_transpose == Transpose::kConjugate);
// Tests the matrix and the vectors for validity
if (packed) { TestMatrixAP(n, a_buffer, a_offset); }
@@ -107,8 +107,8 @@ void Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
IsMultiple(a_ld, db_["VW3"]);
// If possible, run the fast-version (rotated or non-rotated) of the kernel
- auto kernel_name = "Xgemv";
- auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]);
+ auto kernel_name = std::string{"Xgemv"};
+ const auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]);
auto global_size = m_ceiled / db_["WPT1"];
auto local_size = db_["WGS1"];
if (fast_kernel) {
diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp
index 0f2661ad..95b70cd5 100644
--- a/src/utilities/utilities.cpp
+++ b/src/utilities/utilities.cpp
@@ -176,6 +176,7 @@ std::string ToString(Precision value) {
case Precision::kDouble: return ToString(static_cast<int>(value))+" (double)";
case Precision::kComplexSingle: return ToString(static_cast<int>(value))+" (complex-single)";
case Precision::kComplexDouble: return ToString(static_cast<int>(value))+" (complex-double)";
+ case Precision::kAny: return ToString(static_cast<int>(value))+" (any)";
}
}
template <>
@@ -467,6 +468,7 @@ size_t GetBytes(const Precision precision) {
case Precision::kDouble: return 8;
case Precision::kComplexSingle: return 8;
case Precision::kComplexDouble: return 16;
+ case Precision::kAny: return -1;
}
}