diff options
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | src/tuning/tuning.cpp | 8 | ||||
-rw-r--r-- | src/tuning/tuning_api.cpp | 8 | ||||
-rw-r--r-- | src/utilities/utilities.hpp | 3 | ||||
-rw-r--r-- | test/correctness/testblas.cpp | 41 | ||||
-rw-r--r-- | test/routines/level2/xhpr.hpp | 2 | ||||
-rw-r--r-- | test/routines/level2/xhpr2.hpp | 2 | ||||
-rw-r--r-- | test/routines/level2/xspr.hpp | 2 | ||||
-rw-r--r-- | test/routines/level2/xspr2.hpp | 2 |
9 files changed, 49 insertions, 21 deletions
@@ -7,6 +7,8 @@ Development (next version) - Added support for Intel specific subgroup shuffling extensions for faster GEMM on Intel GPUs - Re-added a local memory size constraint to the tuners - Updated and reorganised the CLBlast documentation +- Added a 'canary' region to check for overflows in the tuner and tests (insipred by clARMOR) +- Fixed an access violation when compiled with Visual Studio upon releasing the OpenCL program - Fixed incorrect releasing of the OpenCL program resulting in segfaults / access violations - Various minor fixes and enhancements - Added tuned parameters for various devices (see doc/tuning.md) diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp index dd4a83e6..216f4b31 100644 --- a/src/tuning/tuning.cpp +++ b/src/tuning/tuning.cpp @@ -150,11 +150,11 @@ void Tuner(int argc, char* argv[], const int V, const auto device_architecture = GetDeviceArchitecture(device); const auto device_name = GetDeviceName(device); - // Creates input buffers with random data + // Creates input buffers with random data. Adds a 'canary' region to detect buffer overflows. const auto buffer_sizes = std::vector<size_t>{ - settings.size_x, settings.size_y, - settings.size_a, settings.size_b, settings.size_c, - settings.size_temp + settings.size_x + kCanarySize, settings.size_y + kCanarySize, + settings.size_a + kCanarySize, settings.size_b + kCanarySize, settings.size_c + kCanarySize, + settings.size_temp + kCanarySize }; std::mt19937 mt(kSeed); std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp index f1da40c1..2eec2e2e 100644 --- a/src/tuning/tuning_api.cpp +++ b/src/tuning/tuning_api.cpp @@ -241,11 +241,11 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V, const auto device_architecture = GetDeviceArchitecture(device); const auto device_name = GetDeviceName(device); - // Creates input buffers with random data + // Creates input buffers with random data. Adds a 'canary' region to detect buffer overflows. const auto buffer_sizes = std::vector<size_t>{ - settings.size_x, settings.size_y, - settings.size_a, settings.size_b, settings.size_c, - settings.size_temp + settings.size_x + kCanarySize, settings.size_y + kCanarySize, + settings.size_a + kCanarySize, settings.size_b + kCanarySize, settings.size_c + kCanarySize, + settings.size_temp + kCanarySize }; const auto seed = static_cast<unsigned long>(time(nullptr)); std::mt19937 mt(seed); diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 0edf77fe..84591f70 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -52,6 +52,9 @@ const std::string kKhronosIntelSubgroups = "cl_intel_subgroups"; // Catched an unknown error constexpr auto kUnknownError = -999; +// Canary size to add to buffers to check for buffer overflows +constexpr auto kCanarySize = 127; + // ================================================================================================= // The routine-specific arguments in string form diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index aa4b4785..3c92565e 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -66,14 +66,14 @@ TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool si const auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end()); const auto max_batch_count = *std::max_element(kBatchCounts.begin(), kBatchCounts.end()); - // Creates test input data - x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); - y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); - a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset); - scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset); + // Creates test input data. Adds a 'canary' region to detect buffer overflows + x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset + kCanarySize); + y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset + kCanarySize); + a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize); + b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize); + c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize); + ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset + kCanarySize); + scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset + kCanarySize); std::mt19937 mt(kSeed); std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); PopulateVector(x_source_, mt, dist); @@ -94,7 +94,16 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st TestStart("regular behaviour", name); // Iterates over all the to-be-tested combinations of arguments - for (const auto &args: test_vector) { + for (auto &args: test_vector) { + + // Adds a 'canary' region to detect buffer overflows + args.x_size += kCanarySize; + args.y_size += kCanarySize; + args.a_size += kCanarySize; + args.b_size += kCanarySize; + args.c_size += kCanarySize; + args.ap_size += kCanarySize; + args.scalar_size += kCanarySize; // Prints the current test configuration if (verbose_) { @@ -209,6 +218,20 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st } } } + // Checks for differences in the 'canary' region to detect buffer overflows + for (auto canary_id=size_t{0}; canary_id<kCanarySize; ++canary_id) { + auto index = get_index_(args, get_id1_(args) - 1, get_id2_(args) - 1) + canary_id; + if (!TestSimilarity(result1[index], result2[index])) { + errors++; + if (verbose_) { + if (get_id2_(args) == 1) { std::cout << std::endl << " Buffer overflow index " << index << ": "; } + else { std::cout << std::endl << " Buffer overflow " << index << ": "; } + std::cout << " " << ToString(result1[index]) << " (reference) versus "; + std::cout << " " << ToString(result2[index]) << " (CLBlast)"; + } + } + } + // Report the results if (verbose_ && errors > 0) { diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 1e9bbe29..3a79efa0 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -139,7 +139,7 @@ class TestXhpr { } // Describes how to compute the indices of the result buffer - static size_t ResultID1(const Arguments<U> &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID1(const Arguments<U> &args) { return GetSizeAP(args) - args.ap_offset; } static size_t ResultID2(const Arguments<U> &) { return 1; } // N/A for this routine static size_t GetResultIndex(const Arguments<U> &args, const size_t id1, const size_t) { return id1 + args.ap_offset; diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index 433a5a93..ed37e175 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -148,7 +148,7 @@ class TestXhpr2 { } // Describes how to compute the indices of the result buffer - static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; } static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) { return id1 + args.ap_offset; diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index af17b8cd..3f0dfe62 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -139,7 +139,7 @@ class TestXspr { } // Describes how to compute the indices of the result buffer - static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; } static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) { return id1 + args.ap_offset; diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index b615aca7..b91eab24 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -148,7 +148,7 @@ class TestXspr2 { } // Describes how to compute the indices of the result buffer - static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; } static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) { return id1 + args.ap_offset; |