diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/correctness/testblas.cc | 68 | ||||
-rw-r--r-- | test/correctness/testblas.h | 4 | ||||
-rw-r--r-- | test/correctness/tester.cc | 8 | ||||
-rw-r--r-- | test/correctness/tester.h | 5 | ||||
-rw-r--r-- | test/performance/client.cc | 28 | ||||
-rw-r--r-- | test/performance/client.h | 9 | ||||
-rw-r--r-- | test/routines/level1/xaxpy.h | 11 | ||||
-rw-r--r-- | test/routines/level2/xgemv.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xgemm.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xhemm.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xher2k.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xherk.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xsymm.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xsyr2k.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xsyrk.h | 11 | ||||
-rw-r--r-- | test/routines/level3/xtrmm.h | 11 |
16 files changed, 98 insertions, 134 deletions
diff --git a/test/correctness/testblas.cc b/test/correctness/testblas.cc index 5951b177..ff81f4c3 100644 --- a/test/correctness/testblas.cc +++ b/test/correctness/testblas.cc @@ -76,31 +76,31 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st for (auto &args: test_vector) { // Runs the reference clBLAS code - auto x_vec1 = Buffer(context_, CL_MEM_READ_WRITE, args.x_size*sizeof(T)); - auto y_vec1 = Buffer(context_, CL_MEM_READ_WRITE, args.y_size*sizeof(T)); - auto a_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.a_size*sizeof(T)); - auto b_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.b_size*sizeof(T)); - auto c_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.c_size*sizeof(T)); - x_vec1.WriteBuffer(queue_, args.x_size*sizeof(T), x_source_); - y_vec1.WriteBuffer(queue_, args.y_size*sizeof(T), y_source_); - a_mat1.WriteBuffer(queue_, args.a_size*sizeof(T), a_source_); - b_mat1.WriteBuffer(queue_, args.b_size*sizeof(T), b_source_); - c_mat1.WriteBuffer(queue_, args.c_size*sizeof(T), c_source_); - auto buffers1 = Buffers{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1}; + auto x_vec1 = Buffer<T>(context_, args.x_size); + auto y_vec1 = Buffer<T>(context_, args.y_size); + auto a_mat1 = Buffer<T>(context_, args.a_size); + auto b_mat1 = Buffer<T>(context_, args.b_size); + auto c_mat1 = Buffer<T>(context_, args.c_size); + x_vec1.Write(queue_, args.x_size, x_source_); + y_vec1.Write(queue_, args.y_size, y_source_); + a_mat1.Write(queue_, args.a_size, a_source_); + b_mat1.Write(queue_, args.b_size, b_source_); + c_mat1.Write(queue_, args.c_size, c_source_); + auto buffers1 = Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1}; auto status1 = run_reference_(args, buffers1, queue_); // Runs the CLBlast code - auto x_vec2 = Buffer(context_, CL_MEM_READ_WRITE, args.x_size*sizeof(T)); - auto y_vec2 = Buffer(context_, CL_MEM_READ_WRITE, args.y_size*sizeof(T)); - auto a_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.a_size*sizeof(T)); - auto b_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.b_size*sizeof(T)); - auto c_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.c_size*sizeof(T)); - x_vec2.WriteBuffer(queue_, args.x_size*sizeof(T), x_source_); - y_vec2.WriteBuffer(queue_, args.y_size*sizeof(T), y_source_); - a_mat2.WriteBuffer(queue_, args.a_size*sizeof(T), a_source_); - b_mat2.WriteBuffer(queue_, args.b_size*sizeof(T), b_source_); - c_mat2.WriteBuffer(queue_, args.c_size*sizeof(T), c_source_); - auto buffers2 = Buffers{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2}; + auto x_vec2 = Buffer<T>(context_, args.x_size); + auto y_vec2 = Buffer<T>(context_, args.y_size); + auto a_mat2 = Buffer<T>(context_, args.a_size); + auto b_mat2 = Buffer<T>(context_, args.b_size); + auto c_mat2 = Buffer<T>(context_, args.c_size); + x_vec2.Write(queue_, args.x_size, x_source_); + y_vec2.Write(queue_, args.y_size, y_source_); + a_mat2.Write(queue_, args.a_size, a_source_); + b_mat2.Write(queue_, args.b_size, b_source_); + c_mat2.Write(queue_, args.c_size, c_source_); + auto buffers2 = Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2}; auto status2 = run_routine_(args, buffers2, queue_); // Tests for equality of the two status codes @@ -149,25 +149,25 @@ void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const st auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto x_vec1 = Buffer(x1); - auto y_vec1 = Buffer(y1); - auto a_mat1 = Buffer(a1); - auto b_mat1 = Buffer(b1); - auto c_mat1 = Buffer(c1); + auto x_vec1 = Buffer<T>(x1); + auto y_vec1 = Buffer<T>(y1); + auto a_mat1 = Buffer<T>(a1); + auto b_mat1 = Buffer<T>(b1); + auto c_mat1 = Buffer<T>(c1); auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto x_vec2 = Buffer(x2); - auto y_vec2 = Buffer(y2); - auto a_mat2 = Buffer(a2); - auto b_mat2 = Buffer(b2); - auto c_mat2 = Buffer(c2); + auto x_vec2 = Buffer<T>(x2); + auto y_vec2 = Buffer<T>(y2); + auto a_mat2 = Buffer<T>(a2); + auto b_mat2 = Buffer<T>(b2); + auto c_mat2 = Buffer<T>(c2); // Runs the two routines - auto status1 = run_reference_(args, Buffers{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1}, queue_); - auto status2 = run_routine_(args, Buffers{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2}, queue_); + auto status1 = run_reference_(args, Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1}, queue_); + auto status2 = run_routine_(args, Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2}, queue_); // Tests for equality of the two status codes TestErrorCodes(status1, status2, args); diff --git a/test/correctness/testblas.h b/test/correctness/testblas.h index 96c140c1..af8a4d0e 100644 --- a/test/correctness/testblas.h +++ b/test/correctness/testblas.h @@ -66,8 +66,8 @@ class TestBlas: public Tester<T,U> { static const std::vector<Transpose> kTransposes; // Data-type dependent, see .cc-file // Shorthand for the routine-specific functions passed to the tester - using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers&, CommandQueue&)>; - using ResultGet = std::function<std::vector<T>(const Arguments<U>&, Buffers&, CommandQueue&)>; + using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers<T>&, Queue&)>; + using ResultGet = std::function<std::vector<T>(const Arguments<U>&, Buffers<T>&, Queue&)>; using ResultIndex = std::function<size_t(const Arguments<U>&, const size_t, const size_t)>; using ResultIterator = std::function<size_t(const Arguments<U>&)>; diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc index 378968ed..002cb1a6 100644 --- a/test/correctness/tester.cc +++ b/test/correctness/tester.cc @@ -28,9 +28,9 @@ Tester<T,U>::Tester(int argc, char *argv[], const bool silent, const std::string &name, const std::vector<std::string> &options): help_("Options given/available:\n"), platform_(Platform(GetArgument(argc, argv, help_, kArgPlatform, size_t{0}))), - device_(Device(platform_, kDeviceType, GetArgument(argc, argv, help_, kArgDevice, size_t{0}))), + device_(Device(platform_, GetArgument(argc, argv, help_, kArgDevice, size_t{0}))), context_(Context(device_)), - queue_(CommandQueue(context_, device_)), + queue_(Queue(context_, device_)), full_test_(CheckArgument(argc, argv, help_, kArgFullTest)), error_log_{}, num_passed_{0}, @@ -339,11 +339,11 @@ template <> const std::vector<double2> GetExampleScalars(const bool full_test) { template <> bool PrecisionSupported<float>(const Device &) { return true; } template <> bool PrecisionSupported<float2>(const Device &) { return true; } template <> bool PrecisionSupported<double>(const Device &device) { - auto extensions = device.Extensions(); + auto extensions = device.Capabilities(); return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true; } template <> bool PrecisionSupported<double2>(const Device &device) { - auto extensions = device.Extensions(); + auto extensions = device.Capabilities(); return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true; } diff --git a/test/correctness/tester.h b/test/correctness/tester.h index 93515138..06f4afbe 100644 --- a/test/correctness/tester.h +++ b/test/correctness/tester.h @@ -36,9 +36,6 @@ template <typename T, typename U> class Tester { public: - // Types of devices to consider - const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; - // Maximum number of test results printed on a single line static constexpr auto kResultsPerLine = size_t{64}; @@ -92,7 +89,7 @@ class Tester { Platform platform_; Device device_; Context context_; - CommandQueue queue_; + Queue queue_; // Whether or not to run the full test-suite or just a smoke test bool full_test_; diff --git a/test/performance/client.cc b/test/performance/client.cc index 676e88e4..893bb55d 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -110,9 +110,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) // Initializes OpenCL and the libraries auto platform = Platform(args.platform_id); - auto device = Device(platform, kDeviceType, args.device_id); + auto device = Device(platform, args.device_id); auto context = Context(device); - auto queue = CommandQueue(context, device); + auto queue = Queue(context, device); if (args.compare_clblas) { clblasSetup(); } // Iterates over all "num_step" values jumping by "step" each time @@ -135,17 +135,17 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) PopulateVector(c_source); // Creates the matrices on the device - auto x_vec = Buffer(context, CL_MEM_READ_WRITE, args.x_size*sizeof(T)); - auto y_vec = Buffer(context, CL_MEM_READ_WRITE, args.y_size*sizeof(T)); - auto a_mat = Buffer(context, CL_MEM_READ_WRITE, args.a_size*sizeof(T)); - auto b_mat = Buffer(context, CL_MEM_READ_WRITE, args.b_size*sizeof(T)); - auto c_mat = Buffer(context, CL_MEM_READ_WRITE, args.c_size*sizeof(T)); - x_vec.WriteBuffer(queue, args.x_size*sizeof(T), x_source); - y_vec.WriteBuffer(queue, args.y_size*sizeof(T), y_source); - a_mat.WriteBuffer(queue, args.a_size*sizeof(T), a_source); - b_mat.WriteBuffer(queue, args.b_size*sizeof(T), b_source); - c_mat.WriteBuffer(queue, args.c_size*sizeof(T), c_source); - auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat}; + auto x_vec = Buffer<T>(context, args.x_size); + auto y_vec = Buffer<T>(context, args.y_size); + auto a_mat = Buffer<T>(context, args.a_size); + auto b_mat = Buffer<T>(context, args.b_size); + auto c_mat = Buffer<T>(context, args.c_size); + x_vec.Write(queue, args.x_size, x_source); + y_vec.Write(queue, args.y_size, y_source); + a_mat.Write(queue, args.a_size, a_source); + b_mat.Write(queue, args.b_size, b_source); + c_mat.Write(queue, args.c_size, c_source); + auto buffers = Buffers<T>{x_vec, y_vec, a_mat, b_mat, c_mat}; // Runs the routines and collects the timings auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast"); @@ -176,7 +176,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) // value found in the vector of timing results. The return value is in milliseconds. template <typename T, typename U> double Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &args, - const Buffers &buffers, CommandQueue &queue, + const Buffers<T> &buffers, Queue &queue, Routine run_blas, const std::string &library_name) { auto timings = std::vector<double>(num_runs); for (auto &timing: timings) { diff --git a/test/performance/client.h b/test/performance/client.h index c9095967..9f6852d0 100644 --- a/test/performance/client.h +++ b/test/performance/client.h @@ -38,11 +38,8 @@ template <typename T, typename U> class Client { public: - // Types of devices to consider - const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; - // Shorthand for the routine-specific functions passed to the tester - using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers&, CommandQueue&)>; + using Routine = std::function<StatusCode(const Arguments<U>&, const Buffers<T>&, Queue&)>; using SetMetric = std::function<void(Arguments<U>&)>; using GetMetric = std::function<size_t(const Arguments<U>&)>; @@ -63,8 +60,8 @@ class Client { private: // Runs a function a given number of times and returns the execution time of the shortest instance - double TimedExecution(const size_t num_runs, const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue, Routine run_blas, const std::string &library_name); + double TimedExecution(const size_t num_runs, const Arguments<U> &args, const Buffers<T> &buffers, + Queue &queue, Routine run_blas, const std::string &library_name); // Prints the header of a performance-data table void PrintTableHeader(const bool silent, const std::vector<std::string> &args); diff --git a/test/routines/level1/xaxpy.h b/test/routines/level1/xaxpy.h index 6ce5d7e2..866fb620 100644 --- a/test/routines/level1/xaxpy.h +++ b/test/routines/level1/xaxpy.h @@ -57,8 +57,7 @@ class TestXaxpy { static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Axpy(args.n, args.alpha, @@ -70,8 +69,7 @@ class TestXaxpy { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXaxpy(args.n, args.alpha, @@ -83,10 +81,9 @@ class TestXaxpy { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.y_size, static_cast<T>(0)); - buffers.y_vec.ReadBuffer(queue, args.y_size*sizeof(T), result); + buffers.y_vec.Read(queue, args.y_size, result); return result; } diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h index 73f7d76e..056dec30 100644 --- a/test/routines/level2/xgemv.h +++ b/test/routines/level2/xgemv.h @@ -68,8 +68,7 @@ class TestXgemv { static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemv(args.layout, args.a_transpose, @@ -83,8 +82,7 @@ class TestXgemv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXgemv(static_cast<clblasOrder>(args.layout), @@ -99,10 +97,9 @@ class TestXgemv { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.y_size, static_cast<T>(0)); - buffers.y_vec.ReadBuffer(queue, args.y_size*sizeof(T), result); + buffers.y_vec.Read(queue, args.y_size, result); return result; } diff --git a/test/routines/level3/xgemm.h b/test/routines/level3/xgemm.h index 86a304d1..f06719d6 100644 --- a/test/routines/level3/xgemm.h +++ b/test/routines/level3/xgemm.h @@ -70,8 +70,7 @@ class TestXgemm { static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, @@ -85,8 +84,7 @@ class TestXgemm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXgemm(static_cast<clblasOrder>(args.layout), @@ -102,10 +100,9 @@ class TestXgemm { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xhemm.h b/test/routines/level3/xhemm.h index 75878b06..0c3b9c31 100644 --- a/test/routines/level3/xhemm.h +++ b/test/routines/level3/xhemm.h @@ -70,8 +70,7 @@ class TestXhemm { static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hemm(args.layout, args.side, args.triangle, @@ -85,8 +84,7 @@ class TestXhemm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXhemm(static_cast<clblasOrder>(args.layout), @@ -102,10 +100,9 @@ class TestXhemm { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xher2k.h b/test/routines/level3/xher2k.h index f13e8a62..b20ec973 100644 --- a/test/routines/level3/xher2k.h +++ b/test/routines/level3/xher2k.h @@ -68,8 +68,7 @@ class TestXher2k { static size_t DefaultLDC(const Arguments<U> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<U> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; @@ -84,8 +83,7 @@ class TestXher2k { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<U> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; @@ -102,10 +100,9 @@ class TestXher2k { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xherk.h b/test/routines/level3/xherk.h index 780b9b52..20c2b4b8 100644 --- a/test/routines/level3/xherk.h +++ b/test/routines/level3/xherk.h @@ -61,8 +61,7 @@ class TestXherk { static size_t DefaultLDC(const Arguments<U> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<U> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Herk(args.layout, args.triangle, args.a_transpose, @@ -75,8 +74,7 @@ class TestXherk { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<U> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<U> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXherk(static_cast<clblasOrder>(args.layout), @@ -91,10 +89,9 @@ class TestXherk { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xsymm.h b/test/routines/level3/xsymm.h index 10476349..5b5ad351 100644 --- a/test/routines/level3/xsymm.h +++ b/test/routines/level3/xsymm.h @@ -70,8 +70,7 @@ class TestXsymm { static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Symm(args.layout, args.side, args.triangle, @@ -85,8 +84,7 @@ class TestXsymm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsymm(static_cast<clblasOrder>(args.layout), @@ -102,10 +100,9 @@ class TestXsymm { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xsyr2k.h b/test/routines/level3/xsyr2k.h index f3b1b542..21fcee2a 100644 --- a/test/routines/level3/xsyr2k.h +++ b/test/routines/level3/xsyr2k.h @@ -68,8 +68,7 @@ class TestXsyr2k { static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr2k(args.layout, args.triangle, args.a_transpose, @@ -83,8 +82,7 @@ class TestXsyr2k { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsyr2k(static_cast<clblasOrder>(args.layout), @@ -100,10 +98,9 @@ class TestXsyr2k { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xsyrk.h b/test/routines/level3/xsyrk.h index 2ec9fb65..c92693c2 100644 --- a/test/routines/level3/xsyrk.h +++ b/test/routines/level3/xsyrk.h @@ -61,8 +61,7 @@ class TestXsyrk { static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syrk(args.layout, args.triangle, args.a_transpose, @@ -75,8 +74,7 @@ class TestXsyrk { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsyrk(static_cast<clblasOrder>(args.layout), @@ -91,10 +89,9 @@ class TestXsyrk { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.c_size, static_cast<T>(0)); - buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result); + buffers.c_mat.Read(queue, args.c_size, result); return result; } diff --git a/test/routines/level3/xtrmm.h b/test/routines/level3/xtrmm.h index 7b7e7af1..d5a52903 100644 --- a/test/routines/level3/xtrmm.h +++ b/test/routines/level3/xtrmm.h @@ -61,8 +61,7 @@ class TestXtrmm { static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, @@ -75,8 +74,7 @@ class TestXtrmm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers, - CommandQueue &queue) { + static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrmm(static_cast<clblasOrder>(args.layout), @@ -93,10 +91,9 @@ class TestXtrmm { } // Describes how to download the results of the computation (more importantly: which buffer) - static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers, - CommandQueue &queue) { + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> result(args.b_size, static_cast<T>(0)); - buffers.b_mat.ReadBuffer(queue, args.b_size*sizeof(T), result); + buffers.b_mat.Read(queue, args.b_size, result); return result; } |