From e6da575fff9d55de2b83def06243ca8dc9038f40 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 Oct 2017 19:35:21 +0200 Subject: Modified test interfaces such that they support either OpenCL or CUDA --- test/routines/level2/xgbmv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xgemv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xger.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xgerc.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xgeru.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xhbmv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xhemv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xher.hpp | 25 +++++++++++++++++-------- test/routines/level2/xher2.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xhpmv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xhpr.hpp | 25 +++++++++++++++++-------- test/routines/level2/xhpr2.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xsbmv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xspmv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xspr.hpp | 25 +++++++++++++++++-------- test/routines/level2/xspr2.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xsymv.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xsyr.hpp | 25 +++++++++++++++++-------- test/routines/level2/xsyr2.hpp | 28 +++++++++++++++++++--------- test/routines/level2/xtbmv.hpp | 26 ++++++++++++++++++-------- test/routines/level2/xtpmv.hpp | 25 +++++++++++++++++-------- test/routines/level2/xtrmv.hpp | 25 +++++++++++++++++-------- test/routines/level2/xtrsv.hpp | 25 +++++++++++++++++-------- 23 files changed, 422 insertions(+), 199 deletions(-) (limited to 'test/routines/level2') diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index 23138c77..7c198e5d 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -81,15 +81,25 @@ class TestXgbmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Gbmv(args.layout, args.a_transpose, - args.m, args.n, args.kl, args.ku, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gbmv(args.layout, args.a_transpose, + args.m, args.n, args.kl, args.ku, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Gbmv(args.layout, args.a_transpose, + args.m, args.n, args.kl, args.ku, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index 0ee53b80..780e2976 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -81,15 +81,25 @@ class TestXgemv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Gemv(args.layout, args.a_transpose, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gemv(args.layout, args.a_transpose, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Gemv(args.layout, args.a_transpose, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 92a1a2ae..9c5e2e40 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -77,15 +77,25 @@ class TestXger { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Ger(args.layout, - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Ger(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Ger(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index 5d899398..5f58b65d 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -77,15 +77,25 @@ class TestXgerc { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Gerc(args.layout, - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gerc(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Gerc(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index 96dab22e..fea3932c 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -77,15 +77,25 @@ class TestXgeru { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Geru(args.layout, - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Geru(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Geru(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index b6844744..0ccd69b7 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -75,15 +75,25 @@ class TestXhbmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hbmv(args.layout, args.triangle, - args.n, args.kl, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hbmv(args.layout, args.triangle, + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hbmv(args.layout, args.triangle, + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index e1f23592..053bc2dc 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -75,15 +75,25 @@ class TestXhemv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hemv(args.layout, args.triangle, - args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hemv(args.layout, args.triangle, + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hemv(args.layout, args.triangle, + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index 1ac1247b..745df43f 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -71,14 +71,23 @@ class TestXher { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Her(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Her(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Her(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index 18ccc1ac..794e9a1e 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -75,15 +75,25 @@ class TestXher2 { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Her2(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Her2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Her2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index ad91fe15..157272d3 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -75,15 +75,25 @@ class TestXhpmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hpmv(args.layout, args.triangle, - args.n, args.alpha, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hpmv(args.layout, args.triangle, + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hpmv(args.layout, args.triangle, + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index f9d580cd..a3bc60d1 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -71,14 +71,23 @@ class TestXhpr { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hpr(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.ap_mat(), args.ap_offset, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hpr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hpr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index f946ba5c..1aa6cc54 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -75,15 +75,25 @@ class TestXhpr2 { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Hpr2(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.ap_mat(), args.ap_offset, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hpr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Hpr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index 6481d19b..51d6441e 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -75,15 +75,25 @@ class TestXsbmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Sbmv(args.layout, args.triangle, - args.n, args.kl, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Sbmv(args.layout, args.triangle, + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Sbmv(args.layout, args.triangle, + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index 9815dbee..f3089836 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -75,15 +75,25 @@ class TestXspmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Spmv(args.layout, args.triangle, - args.n, args.alpha, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Spmv(args.layout, args.triangle, + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Spmv(args.layout, args.triangle, + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 01a50c38..d76de610 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -71,14 +71,23 @@ class TestXspr { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Spr(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.ap_mat(), args.ap_offset, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Spr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Spr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index 55f8a141..5ce82a52 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -75,15 +75,25 @@ class TestXspr2 { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Spr2(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.ap_mat(), args.ap_offset, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Spr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Spr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index aec0dfb0..2a70756d 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -75,15 +75,25 @@ class TestXsymv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Symv(args.layout, args.triangle, - args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Symv(args.layout, args.triangle, + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Symv(args.layout, args.triangle, + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index 78b686d8..02aad990 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -71,14 +71,23 @@ class TestXsyr { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Syr(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Syr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index 38aa4f43..492a9d2d 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -75,15 +75,25 @@ class TestXsyr2 { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Syr2(args.layout, args.triangle, - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Syr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index 8c7aa381..a80d9e26 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -70,14 +70,24 @@ class TestXtbmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Tbmv(args.layout, args.triangle, args.a_transpose, args.diagonal, - args.n, args.kl, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Tbmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, args.kl, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Tbmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, args.kl, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index 3afab978..02f334a2 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -70,14 +70,23 @@ class TestXtpmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Tpmv(args.layout, args.triangle, args.a_transpose, args.diagonal, - args.n, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Tpmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Tpmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index 2b71f151..4f2dd582 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -70,14 +70,23 @@ class TestXtrmv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Trmv(args.layout, args.triangle, args.a_transpose, args.diagonal, - args.n, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Trmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Trmv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 85b50e85..aec8eace 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -85,14 +85,23 @@ class TestXtrsv { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Trsv(args.layout, args.triangle, args.a_transpose, args.diagonal, - args.n, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Trsv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Trsv(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } -- cgit v1.2.3 From 8431a165d02f55b4b4bcaa8920da65ad0558f2df Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 Oct 2017 19:38:48 +0200 Subject: Fixed a small copy-paste typo --- test/routines/level2/xtbmv.hpp | 1 - 1 file changed, 1 deletion(-) (limited to 'test/routines/level2') diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index a80d9e26..587676ca 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -84,7 +84,6 @@ class TestXtbmv { args.n, args.kl, buffers.a_mat(), args.a_offset, args.a_ld, buffers.x_vec(), args.x_offset, args.x_inc, - &queue_plain, &event); queue.GetContext()(), queue.GetDevice()()); cuStreamSynchronize(queue()); #endif -- cgit v1.2.3