From fa0a9c689fc21a2a24aeadf82ae0acdf6d8bf831 Mon Sep 17 00:00:00 2001
From: Cedric Nugteren <web@cedricnugteren.nl>
Date: Wed, 8 Mar 2017 20:10:20 +0100
Subject: Make batched routines based on offsets instead of a vector of cl_mem
 objects - undoing many earlier changes

---
 test/performance/client.cpp | 53 +++++++++++++++++++++------------------------
 test/performance/client.hpp |  4 ++--
 2 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'test/performance')
diff --git a/test/performance/client.cpp b/test/performance/client.cpp
index 2b4cdb9b..bd48b047 100644
--- a/test/performance/client.cpp
+++ b/test/performance/client.cpp
@@ -177,13 +177,13 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
     set_sizes(args);
 
     // Populates input host matrices with random data
-    std::vector<T> x_source(args.batch_count * args.x_size);
-    std::vector<T> y_source(args.batch_count * args.y_size);
-    std::vector<T> a_source(args.batch_count * args.a_size);
-    std::vector<T> b_source(args.batch_count * args.b_size);
-    std::vector<T> c_source(args.batch_count * args.c_size);
-    std::vector<T> ap_source(args.batch_count * args.ap_size);
-    std::vector<T> scalar_source(args.batch_count * args.scalar_size);
+    std::vector<T> x_source(args.x_size);
+    std::vector<T> y_source(args.y_size);
+    std::vector<T> a_source(args.a_size);
+    std::vector<T> b_source(args.b_size);
+    std::vector<T> c_source(args.c_size);
+    std::vector<T> ap_source(args.ap_size);
+    std::vector<T> scalar_source(args.scalar_size);
     std::mt19937 mt(kSeed);
     std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit);
     PopulateVector(x_source, mt, dist);
@@ -195,24 +195,21 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
     PopulateVector(scalar_source, mt, dist);
 
     // Creates the matrices on the device
-    auto buffers = std::vector<Buffers<T>>();
-    for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
-      auto x_vec = Buffer<T>(context, args.x_size);
-      auto y_vec = Buffer<T>(context, args.y_size);
-      auto a_mat = Buffer<T>(context, args.a_size);
-      auto b_mat = Buffer<T>(context, args.b_size);
-      auto c_mat = Buffer<T>(context, args.c_size);
-      auto ap_mat = Buffer<T>(context, args.ap_size);
-      auto scalar = Buffer<T>(context, args.scalar_size);
-      x_vec.Write(queue, args.x_size, &x_source[batch * args.x_size]);
-      y_vec.Write(queue, args.y_size, &y_source[batch * args.y_size]);
-      a_mat.Write(queue, args.a_size, &a_source[batch * args.a_size]);
-      b_mat.Write(queue, args.b_size, &b_source[batch * args.b_size]);
-      c_mat.Write(queue, args.c_size, &c_source[batch * args.c_size]);
-      ap_mat.Write(queue, args.ap_size, &ap_source[batch * args.ap_size]);
-      scalar.Write(queue, args.scalar_size, &scalar_source[batch * args.scalar_size]);
-      buffers.push_back(Buffers<T>{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar});
-    }
+    auto x_vec = Buffer<T>(context, args.x_size);
+    auto y_vec = Buffer<T>(context, args.y_size);
+    auto a_mat = Buffer<T>(context, args.a_size);
+    auto b_mat = Buffer<T>(context, args.b_size);
+    auto c_mat = Buffer<T>(context, args.c_size);
+    auto ap_mat = Buffer<T>(context, args.ap_size);
+    auto scalar = Buffer<T>(context, args.scalar_size);
+    x_vec.Write(queue, args.x_size, x_source);
+    y_vec.Write(queue, args.y_size, y_source);
+    a_mat.Write(queue, args.a_size, a_source);
+    b_mat.Write(queue, args.b_size, b_source);
+    c_mat.Write(queue, args.c_size, c_source);
+    ap_mat.Write(queue, args.ap_size, ap_source);
+    scalar.Write(queue, args.scalar_size, scalar_source);
+    auto buffers = Buffers<T>{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar};
 
     // Runs the routines and collects the timings
     auto timings = std::vector<std::pair<std::string, double>>();
@@ -254,7 +251,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
 // value found in the vector of timing results. The return value is in milliseconds.
 template <typename T, typename U>
 double Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &args,
-                                   std::vector<Buffers<T>> &buffers, Queue &queue,
+                                   Buffers<T> &buffers, Queue &queue,
                                    Routine run_blas, const std::string &library_name) {
   auto status = StatusCode::kSuccess;
 
@@ -373,8 +370,8 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
   for (const auto& timing : timings) {
 
     // Computes the GFLOPS and GB/s metrics
-    auto flops = get_flops_(args) * args.batch_count;
-    auto bytes = get_bytes_(args) * args.batch_count;
+    auto flops = get_flops_(args);
+    auto bytes = get_bytes_(args);
     auto gflops = (timing.second != 0.0) ? (flops*1e-6)/timing.second : 0;
     auto gbs = (timing.second != 0.0) ? (bytes*1e-6)/timing.second : 0;
 
diff --git a/test/performance/client.hpp b/test/performance/client.hpp
index a8e31419..4b3e17c7 100644
--- a/test/performance/client.hpp
+++ b/test/performance/client.hpp
@@ -43,7 +43,7 @@ class Client {
   static constexpr auto kSeed = 42; // fixed seed for reproducibility
 
   // Shorthand for the routine-specific functions passed to the tester
-  using Routine = std::function<StatusCode(const Arguments<U>&, std::vector<Buffers<T>>&, Queue&)>;
+  using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
   using SetMetric = std::function<void(Arguments<U>&)>;
   using GetMetric = std::function<size_t(const Arguments<U>&)>;
 
@@ -66,7 +66,7 @@ class Client {
  private:
 
   // Runs a function a given number of times and returns the execution time of the shortest instance
-  double TimedExecution(const size_t num_runs, const Arguments<U> &args, std::vector<Buffers<T>> &buffers,
+  double TimedExecution(const size_t num_runs, const Arguments<U> &args, Buffers<T> &buffers,
                         Queue &queue, Routine run_blas, const std::string &library_name);
 
   // Prints the header of a performance-data table
-- 
cgit v1.2.3