From fa0a9c689fc21a2a24aeadf82ae0acdf6d8bf831 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 8 Mar 2017 20:10:20 +0100 Subject: Make batched routines based on offsets instead of a vector of cl_mem objects - undoing many earlier changes --- test/performance/client.cpp | 53 +++++++++++++++++++++------------------------ test/performance/client.hpp | 4 ++-- 2 files changed, 27 insertions(+), 30 deletions(-) (limited to 'test/performance') diff --git a/test/performance/client.cpp b/test/performance/client.cpp index 2b4cdb9b..bd48b047 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -177,13 +177,13 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) set_sizes(args); // Populates input host matrices with random data - std::vector x_source(args.batch_count * args.x_size); - std::vector y_source(args.batch_count * args.y_size); - std::vector a_source(args.batch_count * args.a_size); - std::vector b_source(args.batch_count * args.b_size); - std::vector c_source(args.batch_count * args.c_size); - std::vector ap_source(args.batch_count * args.ap_size); - std::vector scalar_source(args.batch_count * args.scalar_size); + std::vector x_source(args.x_size); + std::vector y_source(args.y_size); + std::vector a_source(args.a_size); + std::vector b_source(args.b_size); + std::vector c_source(args.c_size); + std::vector ap_source(args.ap_size); + std::vector scalar_source(args.scalar_size); std::mt19937 mt(kSeed); std::uniform_real_distribution dist(kTestDataLowerLimit, kTestDataUpperLimit); PopulateVector(x_source, mt, dist); @@ -195,24 +195,21 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) PopulateVector(scalar_source, mt, dist); // Creates the matrices on the device - auto buffers = std::vector>(); - for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { - auto x_vec = Buffer(context, args.x_size); - auto y_vec = Buffer(context, args.y_size); - auto a_mat = Buffer(context, args.a_size); - auto b_mat = Buffer(context, args.b_size); - auto c_mat = Buffer(context, args.c_size); - auto ap_mat = Buffer(context, args.ap_size); - auto scalar = Buffer(context, args.scalar_size); - x_vec.Write(queue, args.x_size, &x_source[batch * args.x_size]); - y_vec.Write(queue, args.y_size, &y_source[batch * args.y_size]); - a_mat.Write(queue, args.a_size, &a_source[batch * args.a_size]); - b_mat.Write(queue, args.b_size, &b_source[batch * args.b_size]); - c_mat.Write(queue, args.c_size, &c_source[batch * args.c_size]); - ap_mat.Write(queue, args.ap_size, &ap_source[batch * args.ap_size]); - scalar.Write(queue, args.scalar_size, &scalar_source[batch * args.scalar_size]); - buffers.push_back(Buffers{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar}); - } + auto x_vec = Buffer(context, args.x_size); + auto y_vec = Buffer(context, args.y_size); + auto a_mat = Buffer(context, args.a_size); + auto b_mat = Buffer(context, args.b_size); + auto c_mat = Buffer(context, args.c_size); + auto ap_mat = Buffer(context, args.ap_size); + auto scalar = Buffer(context, args.scalar_size); + x_vec.Write(queue, args.x_size, x_source); + y_vec.Write(queue, args.y_size, y_source); + a_mat.Write(queue, args.a_size, a_source); + b_mat.Write(queue, args.b_size, b_source); + c_mat.Write(queue, args.c_size, c_source); + ap_mat.Write(queue, args.ap_size, ap_source); + scalar.Write(queue, args.scalar_size, scalar_source); + auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar}; // Runs the routines and collects the timings auto timings = std::vector>(); @@ -254,7 +251,7 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) // value found in the vector of timing results. The return value is in milliseconds. template double Client::TimedExecution(const size_t num_runs, const Arguments &args, - std::vector> &buffers, Queue &queue, + Buffers &buffers, Queue &queue, Routine run_blas, const std::string &library_name) { auto status = StatusCode::kSuccess; @@ -373,8 +370,8 @@ void Client::PrintTableRow(const Arguments& args, for (const auto& timing : timings) { // Computes the GFLOPS and GB/s metrics - auto flops = get_flops_(args) * args.batch_count; - auto bytes = get_bytes_(args) * args.batch_count; + auto flops = get_flops_(args); + auto bytes = get_bytes_(args); auto gflops = (timing.second != 0.0) ? (flops*1e-6)/timing.second : 0; auto gbs = (timing.second != 0.0) ? (bytes*1e-6)/timing.second : 0; diff --git a/test/performance/client.hpp b/test/performance/client.hpp index a8e31419..4b3e17c7 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -43,7 +43,7 @@ class Client { static constexpr auto kSeed = 42; // fixed seed for reproducibility // Shorthand for the routine-specific functions passed to the tester - using Routine = std::function&, std::vector>&, Queue&)>; + using Routine = std::function&, Buffers&, Queue&)>; using SetMetric = std::function&)>; using GetMetric = std::function&)>; @@ -66,7 +66,7 @@ class Client { private: // Runs a function a given number of times and returns the execution time of the shortest instance - double TimedExecution(const size_t num_runs, const Arguments &args, std::vector> &buffers, + double TimedExecution(const size_t num_runs, const Arguments &args, Buffers &buffers, Queue &queue, Routine run_blas, const std::string &library_name); // Prints the header of a performance-data table -- cgit v1.2.3