summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG3
-rw-r--r--include/clblast.h5
-rw-r--r--include/clblast_cuda.h5
-rwxr-xr-xscripts/generator/generator.py2
-rw-r--r--src/api_common.cpp28
-rw-r--r--src/database/database.hpp1
6 files changed, 42 insertions, 2 deletions
diff --git a/CHANGELOG b/CHANGELOG
index e4205894..83ba7b07 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ Development (next version)
- Improved compilation time by splitting the tuning database into multiple compilation units
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see README)
+- Added the RetrieveParameters function to the API to be able to inspect the tuning parameters
- Added a strided-batched (not part of the BLAS standard) routine, faster but less generic compared
to the existing xGEMMBATCHED routines:
* SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED
@@ -70,7 +71,7 @@ Version 0.11.0
- Replaced the R graph scripts with Python/Matplotlib scripts
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see README)
-- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
+- Added the OverrideParameters function to the API to be able to supply custom tuning parameters
- Added triangular solver (level-2 & level-3) routines:
* STRSV/DTRSV/CTRSV/ZTRSV (experimental, un-optimized)
* STRSM/DTRSM/CTRSM/ZTRSM (experimental, un-optimized)
diff --git a/include/clblast.h b/include/clblast.h
index 8e3e64da..c4ff5290 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -682,6 +682,11 @@ StatusCode PUBLIC_API FillCache(const cl_device_id device);
// =================================================================================================
+// Retrieves current tuning parameters for a specific device-precision-kernel combination
+StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name,
+ const Precision precision,
+ std::unordered_map<std::string,size_t> &parameters);
+
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
// the target routine is called it will re-compile and use the new parameters from then on.
StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name,
diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h
index b0cb9aa8..ed348efe 100644
--- a/include/clblast_cuda.h
+++ b/include/clblast_cuda.h
@@ -654,6 +654,11 @@ StatusCode PUBLIC_API FillCache(const CUdevice device);
// =================================================================================================
+// Retrieves current tuning parameters for a specific device-precision-kernel combination
+StatusCode PUBLIC_API RetrieveParameters(const CUdevice device, const std::string &kernel_name,
+ const Precision precision,
+ std::unordered_map<std::string,size_t> &parameters);
+
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
// the target routine is called it will re-compile and use the new parameters from then on.
StatusCode PUBLIC_API OverrideParameters(const CUdevice device, const std::string &kernel_name,
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 528e61dd..b77b861e 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -47,7 +47,7 @@ FILES = [
"/src/clblast_cuda.cpp",
]
HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21]
-FOOTER_LINES = [36, 56, 27, 38, 6, 6, 6, 9, 2, 36, 55]
+FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 63
diff --git a/src/api_common.cpp b/src/api_common.cpp
index 0d387cd9..4e08f1ef 100644
--- a/src/api_common.cpp
+++ b/src/api_common.cpp
@@ -112,6 +112,34 @@ StatusCode FillCache(const RawDeviceID device) {
// =================================================================================================
+// Retrieves the current tuning parameters for this device-precision-kernel combination
+StatusCode RetrieveParameters(const RawDeviceID device, const std::string &kernel_name,
+ const Precision precision,
+ std::unordered_map<std::string,size_t> &parameters) {
+ try {
+
+ // Retrieves the device name
+ const auto device_cpp = Device(device);
+ const auto platform_id = device_cpp.PlatformID();
+ const auto device_name = GetDeviceName(device_cpp);
+
+ // Retrieves the database values
+ auto in_cache = false;
+ auto database = DatabaseCache::Instance().Get(DatabaseKeyRef{platform_id, device, precision, kernel_name}, &in_cache);
+ if (!in_cache) {
+ log_debug("Searching database for kernel '" + kernel_name + "'");
+ database = Database(device_cpp, kernel_name, precision, {});
+ }
+
+ // Retrieves the parameters
+ for (const auto &parameter: database.GetParameters()) {
+ parameters[parameter.first] = parameter.second;
+ }
+
+ } catch (...) { return DispatchException(); }
+ return StatusCode::kSuccess;
+}
+
// Overrides the tuning parameters for this device-precision-kernel combination
StatusCode OverrideParameters(const RawDeviceID device, const std::string &kernel_name,
const Precision precision,
diff --git a/src/database/database.hpp b/src/database/database.hpp
index 8e53e013..1db2c286 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -56,6 +56,7 @@ class Database {
// Retrieves the values or names of all the parameters
std::string GetValuesString() const;
std::vector<std::string> GetParameterNames() const;
+ const database::Parameters& GetParameters() const { return *parameters_; }
private:
// Search method functions, returning a set of parameters (possibly empty)