diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-08 20:10:20 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-08 20:10:20 +0100 |
commit | fa0a9c689fc21a2a24aeadf82ae0acdf6d8bf831 (patch) | |
tree | 404e85900a4c9038d407addb38798d06bb48868c /test/routines/level3 | |
parent | 6aba0bbae71702c4eebd88d0fe17739b509185c1 (diff) |
Make batched routines based on offsets instead of a vector of cl_mem objects - undoing many earlier changes
Diffstat (limited to 'test/routines/level3')
-rw-r--r-- | test/routines/level3/xgemm.hpp | 26 | ||||
-rw-r--r-- | test/routines/level3/xhemm.hpp | 26 | ||||
-rw-r--r-- | test/routines/level3/xher2k.hpp | 26 | ||||
-rw-r--r-- | test/routines/level3/xherk.hpp | 20 | ||||
-rw-r--r-- | test/routines/level3/xsymm.hpp | 26 | ||||
-rw-r--r-- | test/routines/level3/xsyr2k.hpp | 26 | ||||
-rw-r--r-- | test/routines/level3/xsyrk.hpp | 20 | ||||
-rw-r--r-- | test/routines/level3/xtrmm.hpp | 20 | ||||
-rw-r--r-- | test/routines/level3/xtrsm.hpp | 20 |
9 files changed, 105 insertions, 105 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index 5b220889..1b12fb1c 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -88,14 +88,14 @@ class TestXgemm { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, args.m, args.n, args.k, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -103,16 +103,16 @@ class TestXgemm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXgemm(convertToCLBLAS(args.layout), convertToCLBLAS(args.a_transpose), convertToCLBLAS(args.b_transpose), args.m, args.n, args.k, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -121,13 +121,13 @@ class TestXgemm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXgemm(convertToCBLAS(args.layout), convertToCBLAS(args.a_transpose), convertToCBLAS(args.b_transpose), @@ -135,7 +135,7 @@ class TestXgemm { a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index e6e8724f..76550b15 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -88,14 +88,14 @@ class TestXhemm { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hemm(args.layout, args.side, args.triangle, args.m, args.n, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -103,16 +103,16 @@ class TestXhemm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXhemm(convertToCLBLAS(args.layout), convertToCLBLAS(args.side), convertToCLBLAS(args.triangle), args.m, args.n, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -121,13 +121,13 @@ class TestXhemm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXhemm(convertToCBLAS(args.layout), convertToCBLAS(args.side), convertToCBLAS(args.triangle), @@ -135,7 +135,7 @@ class TestXhemm { a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index 749eca11..5ca3aac6 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -86,15 +86,15 @@ class TestXher2k { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; auto status = Her2k(args.layout, args.triangle, args.a_transpose, args.n, args.k, alpha2, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -102,7 +102,7 @@ class TestXher2k { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; @@ -110,9 +110,9 @@ class TestXher2k { convertToCLBLAS(args.triangle), convertToCLBLAS(args.a_transpose), args.n, args.k, alpha2, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -121,13 +121,13 @@ class TestXher2k { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); auto alpha2 = T{args.alpha, args.alpha}; cblasXher2k(convertToCBLAS(args.layout), convertToCBLAS(args.triangle), @@ -136,7 +136,7 @@ class TestXher2k { a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index e9193847..e93d887a 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -79,13 +79,13 @@ class TestXherk { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Herk(args.layout, args.triangle, args.a_transpose, args.n, args.k, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -93,15 +93,15 @@ class TestXherk { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXherk(convertToCLBLAS(args.layout), convertToCLBLAS(args.triangle), convertToCLBLAS(args.a_transpose), args.n, args.k, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -110,18 +110,18 @@ class TestXherk { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXherk(convertToCBLAS(args.layout), convertToCBLAS(args.triangle), convertToCBLAS(args.a_transpose), args.n, args.k, args.alpha, a_mat_cpu, args.a_offset, args.a_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index bcd74fda..9d127e26 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -88,14 +88,14 @@ class TestXsymm { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Symm(args.layout, args.side, args.triangle, args.m, args.n, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -103,16 +103,16 @@ class TestXsymm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsymm(convertToCLBLAS(args.layout), convertToCLBLAS(args.side), convertToCLBLAS(args.triangle), args.m, args.n, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -121,13 +121,13 @@ class TestXsymm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXsymm(convertToCBLAS(args.layout), convertToCBLAS(args.side), convertToCBLAS(args.triangle), @@ -135,7 +135,7 @@ class TestXsymm { a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index c722e0cf..d1bdac56 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -86,14 +86,14 @@ class TestXsyr2k { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr2k(args.layout, args.triangle, args.a_transpose, args.n, args.k, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -101,16 +101,16 @@ class TestXsyr2k { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsyr2k(convertToCLBLAS(args.layout), convertToCLBLAS(args.triangle), convertToCLBLAS(args.a_transpose), args.n, args.k, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -119,13 +119,13 @@ class TestXsyr2k { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXsyr2k(convertToCBLAS(args.layout), convertToCBLAS(args.triangle), convertToCBLAS(args.a_transpose), @@ -133,7 +133,7 @@ class TestXsyr2k { a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 7d5c2039..1330924e 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -79,13 +79,13 @@ class TestXsyrk { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syrk(args.layout, args.triangle, args.a_transpose, args.n, args.k, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, args.beta, - buffers[0].c_mat(), args.c_offset, args.c_ld, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -93,15 +93,15 @@ class TestXsyrk { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXsyrk(convertToCLBLAS(args.layout), convertToCLBLAS(args.triangle), convertToCLBLAS(args.a_transpose), args.n, args.k, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, args.beta, - buffers[0].c_mat, args.c_offset, args.c_ld, + buffers.a_mat, args.a_offset, args.a_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -110,18 +110,18 @@ class TestXsyrk { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); cblasXsyrk(convertToCBLAS(args.layout), convertToCBLAS(args.triangle), convertToCBLAS(args.a_transpose), args.n, args.k, args.alpha, a_mat_cpu, args.a_offset, args.a_ld, args.beta, c_mat_cpu, args.c_offset, args.c_ld); - buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 50cca6f8..7c5bd842 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -79,13 +79,13 @@ class TestXtrmm { std::vector<T>&, std::vector<T>&) {} // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, args.m, args.n, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -93,7 +93,7 @@ class TestXtrmm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrmm(convertToCLBLAS(args.layout), @@ -102,8 +102,8 @@ class TestXtrmm { convertToCLBLAS(args.a_transpose), convertToCLBLAS(args.diagonal), args.m, args.n, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -112,11 +112,11 @@ class TestXtrmm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); cblasXtrmm(convertToCBLAS(args.layout), convertToCBLAS(args.side), convertToCBLAS(args.triangle), @@ -125,7 +125,7 @@ class TestXtrmm { args.m, args.n, args.alpha, a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld); - buffers[0].b_mat.Write(queue, args.b_size, b_mat_cpu); + buffers.b_mat.Write(queue, args.b_size, b_mat_cpu); return StatusCode::kSuccess; } #endif diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index 91f91d0b..a70ef03f 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -91,13 +91,13 @@ class TestXtrsm { } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, args.m, args.n, args.alpha, - buffers[0].a_mat(), args.a_offset, args.a_ld, - buffers[0].b_mat(), args.b_offset, args.b_ld, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, &queue_plain, &event); if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } return status; @@ -105,7 +105,7 @@ class TestXtrsm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS - static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrsm(convertToCLBLAS(args.layout), @@ -114,8 +114,8 @@ class TestXtrsm { convertToCLBLAS(args.a_transpose), convertToCLBLAS(args.diagonal), args.m, args.n, args.alpha, - buffers[0].a_mat, args.a_offset, args.a_ld, - buffers[0].b_mat, args.b_offset, args.b_ld, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, 1, &queue_plain, 0, nullptr, &event); clWaitForEvents(1, &event); return static_cast<StatusCode>(status); @@ -124,11 +124,11 @@ class TestXtrsm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) { + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); - buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); cblasXtrsm(convertToCBLAS(args.layout), convertToCBLAS(args.side), convertToCBLAS(args.triangle), @@ -137,7 +137,7 @@ class TestXtrsm { args.m, args.n, args.alpha, a_mat_cpu, args.a_offset, args.a_ld, b_mat_cpu, args.b_offset, args.b_ld); - buffers[0].b_mat.Write(queue, args.b_size, b_mat_cpu); + buffers.b_mat.Write(queue, args.b_size, b_mat_cpu); return StatusCode::kSuccess; } #endif |