summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--src/tuning/tuning.cpp8
-rw-r--r--src/tuning/tuning_api.cpp8
-rw-r--r--src/utilities/utilities.hpp3
-rw-r--r--test/correctness/testblas.cpp41
-rw-r--r--test/routines/level2/xhpr.hpp2
-rw-r--r--test/routines/level2/xhpr2.hpp2
-rw-r--r--test/routines/level2/xspr.hpp2
-rw-r--r--test/routines/level2/xspr2.hpp2
9 files changed, 49 insertions, 21 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4ccd09ec..1d2f3d9d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,8 @@ Development (next version)
- Added support for Intel specific subgroup shuffling extensions for faster GEMM on Intel GPUs
- Re-added a local memory size constraint to the tuners
- Updated and reorganised the CLBlast documentation
+- Added a 'canary' region to check for overflows in the tuner and tests (insipred by clARMOR)
+- Fixed an access violation when compiled with Visual Studio upon releasing the OpenCL program
- Fixed incorrect releasing of the OpenCL program resulting in segfaults / access violations
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see doc/tuning.md)
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index dd4a83e6..216f4b31 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -150,11 +150,11 @@ void Tuner(int argc, char* argv[], const int V,
const auto device_architecture = GetDeviceArchitecture(device);
const auto device_name = GetDeviceName(device);
- // Creates input buffers with random data
+ // Creates input buffers with random data. Adds a 'canary' region to detect buffer overflows.
const auto buffer_sizes = std::vector<size_t>{
- settings.size_x, settings.size_y,
- settings.size_a, settings.size_b, settings.size_c,
- settings.size_temp
+ settings.size_x + kCanarySize, settings.size_y + kCanarySize,
+ settings.size_a + kCanarySize, settings.size_b + kCanarySize, settings.size_c + kCanarySize,
+ settings.size_temp + kCanarySize
};
std::mt19937 mt(kSeed);
std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit);
diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp
index f1da40c1..2eec2e2e 100644
--- a/src/tuning/tuning_api.cpp
+++ b/src/tuning/tuning_api.cpp
@@ -241,11 +241,11 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V,
const auto device_architecture = GetDeviceArchitecture(device);
const auto device_name = GetDeviceName(device);
- // Creates input buffers with random data
+ // Creates input buffers with random data. Adds a 'canary' region to detect buffer overflows.
const auto buffer_sizes = std::vector<size_t>{
- settings.size_x, settings.size_y,
- settings.size_a, settings.size_b, settings.size_c,
- settings.size_temp
+ settings.size_x + kCanarySize, settings.size_y + kCanarySize,
+ settings.size_a + kCanarySize, settings.size_b + kCanarySize, settings.size_c + kCanarySize,
+ settings.size_temp + kCanarySize
};
const auto seed = static_cast<unsigned long>(time(nullptr));
std::mt19937 mt(seed);
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 0edf77fe..84591f70 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -52,6 +52,9 @@ const std::string kKhronosIntelSubgroups = "cl_intel_subgroups";
// Catched an unknown error
constexpr auto kUnknownError = -999;
+// Canary size to add to buffers to check for buffer overflows
+constexpr auto kCanarySize = 127;
+
// =================================================================================================
// The routine-specific arguments in string form
diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp
index aa4b4785..3c92565e 100644
--- a/test/correctness/testblas.cpp
+++ b/test/correctness/testblas.cpp
@@ -66,14 +66,14 @@ TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool si
const auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
const auto max_batch_count = *std::max_element(kBatchCounts.begin(), kBatchCounts.end());
- // Creates test input data
- x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset);
- y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset);
- a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
- b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
- c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
- ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset);
- scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset);
+ // Creates test input data. Adds a 'canary' region to detect buffer overflows
+ x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset + kCanarySize);
+ y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset + kCanarySize);
+ a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize);
+ b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize);
+ c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset + kCanarySize);
+ ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset + kCanarySize);
+ scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset + kCanarySize);
std::mt19937 mt(kSeed);
std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit);
PopulateVector(x_source_, mt, dist);
@@ -94,7 +94,16 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st
TestStart("regular behaviour", name);
// Iterates over all the to-be-tested combinations of arguments
- for (const auto &args: test_vector) {
+ for (auto &args: test_vector) {
+
+ // Adds a 'canary' region to detect buffer overflows
+ args.x_size += kCanarySize;
+ args.y_size += kCanarySize;
+ args.a_size += kCanarySize;
+ args.b_size += kCanarySize;
+ args.c_size += kCanarySize;
+ args.ap_size += kCanarySize;
+ args.scalar_size += kCanarySize;
// Prints the current test configuration
if (verbose_) {
@@ -209,6 +218,20 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st
}
}
}
+ // Checks for differences in the 'canary' region to detect buffer overflows
+ for (auto canary_id=size_t{0}; canary_id<kCanarySize; ++canary_id) {
+ auto index = get_index_(args, get_id1_(args) - 1, get_id2_(args) - 1) + canary_id;
+ if (!TestSimilarity(result1[index], result2[index])) {
+ errors++;
+ if (verbose_) {
+ if (get_id2_(args) == 1) { std::cout << std::endl << " Buffer overflow index " << index << ": "; }
+ else { std::cout << std::endl << " Buffer overflow " << index << ": "; }
+ std::cout << " " << ToString(result1[index]) << " (reference) versus ";
+ std::cout << " " << ToString(result2[index]) << " (CLBlast)";
+ }
+ }
+ }
+
// Report the results
if (verbose_ && errors > 0) {
diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp
index 1e9bbe29..3a79efa0 100644
--- a/test/routines/level2/xhpr.hpp
+++ b/test/routines/level2/xhpr.hpp
@@ -139,7 +139,7 @@ class TestXhpr {
}
// Describes how to compute the indices of the result buffer
- static size_t ResultID1(const Arguments<U> &args) { return args.ap_size - args.ap_offset; }
+ static size_t ResultID1(const Arguments<U> &args) { return GetSizeAP(args) - args.ap_offset; }
static size_t ResultID2(const Arguments<U> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<U> &args, const size_t id1, const size_t) {
return id1 + args.ap_offset;
diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp
index 433a5a93..ed37e175 100644
--- a/test/routines/level2/xhpr2.hpp
+++ b/test/routines/level2/xhpr2.hpp
@@ -148,7 +148,7 @@ class TestXhpr2 {
}
// Describes how to compute the indices of the result buffer
- static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; }
+ static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; }
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
return id1 + args.ap_offset;
diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp
index af17b8cd..3f0dfe62 100644
--- a/test/routines/level2/xspr.hpp
+++ b/test/routines/level2/xspr.hpp
@@ -139,7 +139,7 @@ class TestXspr {
}
// Describes how to compute the indices of the result buffer
- static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; }
+ static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; }
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
return id1 + args.ap_offset;
diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp
index b615aca7..b91eab24 100644
--- a/test/routines/level2/xspr2.hpp
+++ b/test/routines/level2/xspr2.hpp
@@ -148,7 +148,7 @@ class TestXspr2 {
}
// Describes how to compute the indices of the result buffer
- static size_t ResultID1(const Arguments<T> &args) { return args.ap_size - args.ap_offset; }
+ static size_t ResultID1(const Arguments<T> &args) { return GetSizeAP(args) - args.ap_offset; }
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
return id1 + args.ap_offset;