diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-15 19:35:21 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-15 19:35:21 +0200 |
commit | e6da575fff9d55de2b83def06243ca8dc9038f40 (patch) | |
tree | 79366158ad584728673585ead2c96547353b983d /test/routines/level3 | |
parent | 7663cba23487290d7bf62c268410c840e3ee7972 (diff) |
Modified test interfaces such that they support either OpenCL or CUDA
Diffstat (limited to 'test/routines/level3')
-rw-r--r-- | test/routines/level3/xgemm.hpp | 28 | ||||
-rw-r--r-- | test/routines/level3/xhemm.hpp | 28 | ||||
-rw-r--r-- | test/routines/level3/xher2k.hpp | 28 | ||||
-rw-r--r-- | test/routines/level3/xherk.hpp | 25 | ||||
-rw-r--r-- | test/routines/level3/xsymm.hpp | 28 | ||||
-rw-r--r-- | test/routines/level3/xsyr2k.hpp | 28 | ||||
-rw-r--r-- | test/routines/level3/xsyrk.hpp | 25 | ||||
-rw-r--r-- | test/routines/level3/xtrmm.hpp | 25 | ||||
-rw-r--r-- | test/routines/level3/xtrsm.hpp | 25 |
9 files changed, 163 insertions, 77 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index 1c430c1c..8444c1c3 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -90,15 +90,25 @@ class TestXgemm { {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}}); if (override_status != StatusCode::kSuccess) { return override_status; } } - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, - args.m, args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index a89617b5..3b70d3f1 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -83,15 +83,25 @@ class TestXhemm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hemm(args.layout, args.side, args.triangle, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hemm(args.layout, args.side, args.triangle, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hemm(args.layout, args.side, args.triangle, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index 55e6d894..6c4e12f1 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -81,16 +81,26 @@ class TestXher2k { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; - auto status = Her2k(args.layout, args.triangle, args.a_transpose, - args.n, args.k, alpha2, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Her2k(args.layout, args.triangle, args.a_transpose, + args.n, args.k, alpha2, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Her2k(args.layout, args.triangle, args.a_transpose, + args.n, args.k, alpha2, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index 3e1e7e02..c1bb7a0b 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -74,14 +74,23 @@ class TestXherk { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Herk(args.layout, args.triangle, args.a_transpose, - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Herk(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Herk(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 5d840d40..90cc1888 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -83,15 +83,25 @@ class TestXsymm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Symm(args.layout, args.side, args.triangle, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Symm(args.layout, args.side, args.triangle, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Symm(args.layout, args.side, args.triangle, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 4a4a2f10..6b29aff7 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -81,15 +81,25 @@ class TestXsyr2k { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Syr2k(args.layout, args.triangle, args.a_transpose, - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syr2k(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Syr2k(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 90e46727..b7782176 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -74,14 +74,23 @@ class TestXsyrk { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Syrk(args.layout, args.triangle, args.a_transpose, - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syrk(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Syrk(args.layout, args.triangle, args.a_transpose, + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index acc00e01..62d0f573 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -74,14 +74,23 @@ class TestXtrmm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index d63c9d79..9ce1f09c 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -85,14 +85,23 @@ class TestXtrsm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } |