diff options
Diffstat (limited to 'test/correctness')
-rw-r--r-- | test/correctness/misc/override_parameters.cpp | 2 | ||||
-rw-r--r-- | test/correctness/testblas.cpp | 213 | ||||
-rw-r--r-- | test/correctness/testblas.hpp | 11 | ||||
-rw-r--r-- | test/correctness/tester.cpp | 1 |
4 files changed, 127 insertions, 100 deletions
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index e6eebef7..c6c70d9f 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -88,7 +88,7 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st device_b.Write(queue, host_b.size(), host_b); device_c.Write(queue, host_c.size(), host_c); auto dummy = Buffer<T>(context, 1); - auto buffers = Buffers<T>{dummy, dummy, device_a, device_b, device_c, dummy, dummy}; + auto buffers = std::vector<Buffers<T>>{Buffers<T>{dummy, dummy, device_a, device_b, device_c, dummy, dummy}}; // Loops over the valid combinations: run before and run afterwards fprintf(stdout, "* Testing OverrideParameters for '%s'\n", routine_name.c_str()); diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index 505b3b36..fcb2eceb 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -27,6 +27,7 @@ template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kIncr template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixDims = { 7, 64 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixVectorDims = { 61, 256 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kBandSizes = { 4, 19 }; +template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kBatchCounts = { 1, 3 }; // Test settings for the invalid tests template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kInvalidIncrements = { 0, 1 }; @@ -81,15 +82,16 @@ TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool si const auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end()); const auto max_matvec = *std::max_element(kMatrixVectorDims.begin(), kMatrixVectorDims.end()); const auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end()); + const auto max_batch_count = *std::max_element(kBatchCounts.begin(), kBatchCounts.end()); // Creates test input data - x_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset); - y_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset); - a_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - b_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - c_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - ap_source_.resize(std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset); - scalar_source_.resize(std::max(max_mat, max_matvec) + max_offset); + x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); + y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); + a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset); + scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset); std::mt19937 mt(kSeed); std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); PopulateVector(x_source_, mt, dist); @@ -124,21 +126,24 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st ap_source_, scalar_source_); // Set-up for the CLBlast run - auto x_vec2 = Buffer<T>(context_, args.x_size); - auto y_vec2 = Buffer<T>(context_, args.y_size); - auto a_mat2 = Buffer<T>(context_, args.a_size); - auto b_mat2 = Buffer<T>(context_, args.b_size); - auto c_mat2 = Buffer<T>(context_, args.c_size); - auto ap_mat2 = Buffer<T>(context_, args.ap_size); - auto scalar2 = Buffer<T>(context_, args.scalar_size); - x_vec2.Write(queue_, args.x_size, x_source_); - y_vec2.Write(queue_, args.y_size, y_source_); - a_mat2.Write(queue_, args.a_size, a_source_); - b_mat2.Write(queue_, args.b_size, b_source_); - c_mat2.Write(queue_, args.c_size, c_source_); - ap_mat2.Write(queue_, args.ap_size, ap_source_); - scalar2.Write(queue_, args.scalar_size, scalar_source_); - auto buffers2 = Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}; + auto buffers2 = std::vector<Buffers<T>>(); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto x_vec2 = Buffer<T>(context_, args.x_size); + auto y_vec2 = Buffer<T>(context_, args.y_size); + auto a_mat2 = Buffer<T>(context_, args.a_size); + auto b_mat2 = Buffer<T>(context_, args.b_size); + auto c_mat2 = Buffer<T>(context_, args.c_size); + auto ap_mat2 = Buffer<T>(context_, args.ap_size); + auto scalar2 = Buffer<T>(context_, args.scalar_size); + x_vec2.Write(queue_, args.x_size, &x_source_[batch * args.x_size]); + y_vec2.Write(queue_, args.y_size, &y_source_[batch * args.y_size]); + a_mat2.Write(queue_, args.a_size, &a_source_[batch * args.a_size]); + b_mat2.Write(queue_, args.b_size, &b_source_[batch * args.b_size]); + c_mat2.Write(queue_, args.c_size, &c_source_[batch * args.c_size]); + ap_mat2.Write(queue_, args.ap_size, &ap_source_[batch * args.ap_size]); + scalar2.Write(queue_, args.scalar_size, &scalar_source_[batch * args.scalar_size]); + buffers2.push_back(Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}); + } // Runs CLBlast if (verbose_) { @@ -158,21 +163,24 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st } // Set-up for the reference run - auto x_vec1 = Buffer<T>(context_, args.x_size); - auto y_vec1 = Buffer<T>(context_, args.y_size); - auto a_mat1 = Buffer<T>(context_, args.a_size); - auto b_mat1 = Buffer<T>(context_, args.b_size); - auto c_mat1 = Buffer<T>(context_, args.c_size); - auto ap_mat1 = Buffer<T>(context_, args.ap_size); - auto scalar1 = Buffer<T>(context_, args.scalar_size); - x_vec1.Write(queue_, args.x_size, x_source_); - y_vec1.Write(queue_, args.y_size, y_source_); - a_mat1.Write(queue_, args.a_size, a_source_); - b_mat1.Write(queue_, args.b_size, b_source_); - c_mat1.Write(queue_, args.c_size, c_source_); - ap_mat1.Write(queue_, args.ap_size, ap_source_); - scalar1.Write(queue_, args.scalar_size, scalar_source_); - auto buffers1 = Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}; + auto buffers1 = std::vector<Buffers<T>>(); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto x_vec1 = Buffer<T>(context_, args.x_size); + auto y_vec1 = Buffer<T>(context_, args.y_size); + auto a_mat1 = Buffer<T>(context_, args.a_size); + auto b_mat1 = Buffer<T>(context_, args.b_size); + auto c_mat1 = Buffer<T>(context_, args.c_size); + auto ap_mat1 = Buffer<T>(context_, args.ap_size); + auto scalar1 = Buffer<T>(context_, args.scalar_size); + x_vec1.Write(queue_, args.x_size, &x_source_[batch * args.x_size]); + y_vec1.Write(queue_, args.y_size, &y_source_[batch * args.y_size]); + a_mat1.Write(queue_, args.a_size, &a_source_[batch * args.a_size]); + b_mat1.Write(queue_, args.b_size, &b_source_[batch * args.b_size]); + c_mat1.Write(queue_, args.c_size, &c_source_[batch * args.c_size]); + ap_mat1.Write(queue_, args.ap_size, &ap_source_[batch * args.ap_size]); + scalar1.Write(queue_, args.scalar_size, &scalar_source_[batch * args.scalar_size]); + buffers1.push_back(Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}); + } // Runs the reference code if (verbose_) { @@ -189,46 +197,55 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st continue; } - // Downloads the results - auto result1 = get_result_(args, buffers1, queue_); - auto result2 = get_result_(args, buffers2, queue_); - - // Computes the L2 error - const auto kErrorMarginL2 = getL2ErrorMargin<T>(); + // Error checking for each batch + auto errors = size_t{0}; auto l2error = 0.0; - for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) { - for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) { - auto index = get_index_(args, id1, id2); - l2error += SquaredDifference(result1[index], result2[index]); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + + // Downloads the results + auto result1 = get_result_(args, buffers1[batch], queue_); + auto result2 = get_result_(args, buffers2[batch], queue_); + + // Computes the L2 error + auto l2error_batch = 0.0; + const auto kErrorMarginL2 = getL2ErrorMargin<T>(); + for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) { + for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) { + auto index = get_index_(args, id1, id2); + l2error_batch += SquaredDifference(result1[index], result2[index]); + } } - } - l2error /= (get_id1_(args) * get_id2_(args)); - - // Checks for differences in the output - auto errors = size_t{0}; - for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) { - for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) { - auto index = get_index_(args, id1, id2); - if (!TestSimilarity(result1[index], result2[index])) { - if (l2error >= kErrorMarginL2) { errors++; } - if (verbose_) { - if (get_id2_(args) == 1) { fprintf(stdout, "\n Error at index %zu: ", id1); } - else { fprintf(stdout, "\n Error at %zu,%zu: ", id1, id2); } - fprintf(stdout, " %s (reference) versus ", ToString(result1[index]).c_str()); - fprintf(stdout, " %s (CLBlast)", ToString(result2[index]).c_str()); - if (l2error < kErrorMarginL2) { - fprintf(stdout, " - error suppressed by a low total L2 error\n"); + l2error_batch /= static_cast<double>(get_id1_(args) * get_id2_(args)); + l2error += l2error_batch; + + // Checks for differences in the output + for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) { + for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) { + auto index = get_index_(args, id1, id2); + if (!TestSimilarity(result1[index], result2[index])) { + if (l2error_batch >= kErrorMarginL2) { errors++; } + if (verbose_) { + if (get_id2_(args) == 1) { fprintf(stdout, "\n Error at index %zu: ", id1); } + else { fprintf(stdout, "\n Error at %zu,%zu: ", id1, id2); } + fprintf(stdout, " %s (reference) versus ", ToString(result1[index]).c_str()); + fprintf(stdout, " %s (CLBlast)", ToString(result2[index]).c_str()); + if (l2error_batch < kErrorMarginL2) { + fprintf(stdout, " - error suppressed by a low total L2 error\n"); + } } } } } } + l2error /= static_cast<double>(args.batch_count); + + // Report the results if (verbose_ && errors > 0) { - fprintf(stdout, "\n Combined L2 error: %.2e\n ", l2error); + fprintf(stdout, "\n Combined average L2 error: %.2e\n ", l2error); } // Tests the error count (should be zero) - TestErrorCount(errors, get_id1_(args)*get_id2_(args), args); + TestErrorCount(errors, get_id1_(args)*get_id2_(args)*args.batch_count, args); } TestEnd(); } @@ -255,36 +272,40 @@ void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const st // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly // want to be able to create invalid buffers (no error checking here). - auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec1 = Buffer<T>(x1); - auto y_vec1 = Buffer<T>(y1); - auto a_mat1 = Buffer<T>(a1); - auto b_mat1 = Buffer<T>(b1); - auto c_mat1 = Buffer<T>(c1); - auto ap_mat1 = Buffer<T>(ap1); - auto scalar1 = Buffer<T>(d1); - auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec2 = Buffer<T>(x2); - auto y_vec2 = Buffer<T>(y2); - auto a_mat2 = Buffer<T>(a2); - auto b_mat2 = Buffer<T>(b2); - auto c_mat2 = Buffer<T>(c2); - auto ap_mat2 = Buffer<T>(ap2); - auto scalar2 = Buffer<T>(d2); - auto buffers1 = Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}; - auto buffers2 = Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}; + auto buffers1 = std::vector<Buffers<T>>(); + auto buffers2 = std::vector<Buffers<T>>(); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); + auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); + auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); + auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); + auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); + auto ap1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); + auto d1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); + auto x_vec1 = Buffer<T>(x1); + auto y_vec1 = Buffer<T>(y1); + auto a_mat1 = Buffer<T>(a1); + auto b_mat1 = Buffer<T>(b1); + auto c_mat1 = Buffer<T>(c1); + auto ap_mat1 = Buffer<T>(ap1); + auto scalar1 = Buffer<T>(d1); + auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); + auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); + auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); + auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); + auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); + auto ap2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); + auto d2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); + auto x_vec2 = Buffer<T>(x2); + auto y_vec2 = Buffer<T>(y2); + auto a_mat2 = Buffer<T>(a2); + auto b_mat2 = Buffer<T>(b2); + auto c_mat2 = Buffer<T>(c2); + auto ap_mat2 = Buffer<T>(ap2); + auto scalar2 = Buffer<T>(d2); + buffers1.push_back(Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}); + buffers2.push_back(Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}); + } // Runs CLBlast if (verbose_) { diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp index ee795aad..e675fa9b 100644 --- a/test/correctness/testblas.hpp +++ b/test/correctness/testblas.hpp @@ -56,6 +56,7 @@ class TestBlas: public Tester<T,U> { static const std::vector<size_t> kMatrixDims; static const std::vector<size_t> kMatrixVectorDims; static const std::vector<size_t> kBandSizes; + static const std::vector<size_t> kBatchCounts; const std::vector<size_t> kOffsets; const std::vector<U> kAlphaValues; const std::vector<U> kBetaValues; @@ -78,7 +79,7 @@ class TestBlas: public Tester<T,U> { std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&)>; - using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>; + using Routine = std::function<StatusCode(const Arguments<U>&, std::vector<Buffers<T>>&, Queue&)>; using ResultGet = std::function<std::vector<T>(const Arguments<U>&, Buffers<T>&, Queue&)>; using ResultIndex = std::function<size_t(const Arguments<U>&, const size_t, const size_t)>; using ResultIterator = std::function<size_t(const Arguments<U>&)>; @@ -183,6 +184,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na auto imax_offsets = std::vector<size_t>{args.imax_offset}; auto alphas = std::vector<U>{args.alpha}; auto betas = std::vector<U>{args.beta}; + auto batch_counts = std::vector<size_t>{args.batch_count}; auto x_sizes = std::vector<size_t>{args.x_size}; auto y_sizes = std::vector<size_t>{args.y_size}; auto a_sizes = std::vector<size_t>{args.a_size}; @@ -226,6 +228,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; } if (option == kArgAlpha) { alphas = tester.kAlphaValues; } if (option == kArgBeta) { betas = tester.kBetaValues; } + if (option == kArgBatchCount) { batch_counts = tester.kBatchCounts; } if (option == kArgXOffset) { x_sizes = tester.kVecSizes; } if (option == kArgYOffset) { y_sizes = tester.kVecSizes; } @@ -268,8 +271,10 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na for (auto &imax_offset: imax_offsets) { r_args.imax_offset = imax_offset; for (auto &alpha: alphas) { r_args.alpha = alpha; for (auto &beta: betas) { r_args.beta = beta; - C::SetSizes(r_args); - regular_test_vector.push_back(r_args); + for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count; + C::SetSizes(r_args); + regular_test_vector.push_back(r_args); + } } } } diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index cbfc5bb2..40784fdb 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -367,6 +367,7 @@ std::string Tester<T,U>::GetOptionsString(const Arguments<U> &args) { if (o == kArgDotOffset){ result += kArgDotOffset + equals + ToString(args.dot_offset) + " "; } if (o == kArgAlpha) { result += kArgAlpha + equals + ToString(args.alpha) + " "; } if (o == kArgBeta) { result += kArgBeta + equals + ToString(args.beta) + " "; } + if (o == kArgBatchCount){result += kArgBatchCount + equals + ToString(args.batch_count) + " "; } } return result; } |