diff options
Diffstat (limited to 'test/performance')
-rw-r--r-- | test/performance/client.cc | 28 | ||||
-rw-r--r-- | test/performance/client.h | 9 |
2 files changed, 17 insertions, 20 deletions
diff --git a/test/performance/client.cc b/test/performance/client.cc index 676e88e4..893bb55d 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -110,9 +110,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) // Initializes OpenCL and the libraries auto platform = Platform(args.platform_id); - auto device = Device(platform, kDeviceType, args.device_id); + auto device = Device(platform, args.device_id); auto context = Context(device); - auto queue = CommandQueue(context, device); + auto queue = Queue(context, device); if (args.compare_clblas) { clblasSetup(); } // Iterates over all "num_step" values jumping by "step" each time @@ -135,17 +135,17 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) PopulateVector(c_source); // Creates the matrices on the device - auto x_vec = Buffer(context, CL_MEM_READ_WRITE, args.x_size*sizeof(T)); - auto y_vec = Buffer(context, CL_MEM_READ_WRITE, args.y_size*sizeof(T)); - auto a_mat = Buffer(context, CL_MEM_READ_WRITE, args.a_size*sizeof(T)); - auto b_mat = Buffer(context, CL_MEM_READ_WRITE, args.b_size*sizeof(T)); - auto c_mat = Buffer(context, CL_MEM_READ_WRITE, args.c_size*sizeof(T)); - x_vec.WriteBuffer(queue, args.x_size*sizeof(T), x_source); - y_vec.WriteBuffer(queue, args.y_size*sizeof(T), y_source); - a_mat.WriteBuffer(queue, args.a_size*sizeof(T), a_source); - b_mat.WriteBuffer(queue, args.b_size*sizeof(T), b_source); - c_mat.WriteBuffer(queue, args.c_size*sizeof(T), c_source); - auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat}; + auto x_vec = Buffer<T>(context, args.x_size); + auto y_vec = Buffer<T>(context, args.y_size); + auto a_mat = Buffer<T>(context, args.a_size); + auto b_mat = Buffer<T>(context, args.b_size); + auto c_mat = Buffer<T>(context, args.c_size); + x_vec.Write(queue, args.x_size, x_source); + y_vec.Write(queue, args.y_size, y_source); + a_mat.Write(queue, args.a_size, a_source); + b_mat.Write(queue, args.b_size, b_source); + c_mat.Write(queue, args.c_size, c_source); + auto buffers = Buffers<T>{x_vec, y_vec, a_mat, b_mat, c_mat}; // Runs the routines and collects the timings auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast"); @@ -176,7 +176,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) // value found in the vector of timing results. The return value is in milliseconds. template <typename T, typename U> double Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &args, - const Buffers &buffers, CommandQueue &queue, + const Buffers<T> &buffers, Queue &queue, Routine run_blas, const std::string &library_name) { auto timings = std::vector<double>(num_runs); for (auto &timing: timings) { diff --git a/test/performance/client.h b/test/performance/client.h index c9095967..9f6852d0 100644 --- a/test/performance/client.h +++ b/test/performance/client.h @@ -38,11 +38,8 @@ template <typename T, typename U> class Client { public: - // Types of devices to consider - const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; - // Shorthand for the routine-specific functions passed to the tester - using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers&, CommandQueue&)>; + using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers<T>&, Queue&)>; using SetMetric = std::function<void(Arguments<U>&)>; using GetMetric = std::function<size_t(const Arguments<U>&)>; @@ -63,8 +60,8 @@ class Client { private: // Runs a function a given number of times and returns the execution time of the shortest instance - double TimedExecution(const size_t num_runs, const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue, Routine run_blas, const std::string &library_name); + double TimedExecution(const size_t num_runs, const Arguments<U> &args, const Buffers<T> &buffers, + Queue &queue, Routine run_blas, const std::string &library_name); // Prints the header of a performance-data table void PrintTableHeader(const bool silent, const std::vector<std::string> &args); |