summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--test/diagnostics.cpp3
2 files changed, 4 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 32c0be3a..f93e736d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,7 @@ Development (next version)
- The tuning database now has a dictionary to translate vendor/device names to a common set
- The tuners can now distinguish between different AMD GPU board names of the same architecture
- The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian')
+- Improved performance for small problems on NVIDIA hardware by caching the device name
- Further improved compilation time of database.cpp
- Added a small diagnostics helper executable
- Various minor fixes and enhancements
diff --git a/test/diagnostics.cpp b/test/diagnostics.cpp
index 6872ed6f..99b936f8 100644
--- a/test/diagnostics.cpp
+++ b/test/diagnostics.cpp
@@ -83,6 +83,9 @@ void OpenCLDiagnostics(int argc, char *argv[]) {
printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} ));
printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} ));
printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} ));
+ printf("* device.Vendor() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Vendor();} ));
+ printf("* device.Version() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Version();} ));
+ printf("* device.Platform() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Platform();} ));
printf("* Buffer<float>(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer<float>(context, 1024);} ));
printf("\n");