summaryrefslogtreecommitdiff
path: root/test/wrapper_clblas.h
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-03-30 21:37:56 -0700
committercnugteren <web@cedricnugteren.nl>2016-03-30 21:37:56 -0700
commit8c3c6db7d07adaacb702fdaabfdf18f74fbfea13 (patch)
treef6dcd3f9d4f987ec74f87b1939c4b3600a7d42d0 /test/wrapper_clblas.h
parent6578102ae996ce0aa52b45704f38c1cd5a10d3c0 (diff)
parent5409f349a17f60ba68133fd0cc9789fb2918f790 (diff)
Merge branch 'level1_routines' into development
Diffstat (limited to 'test/wrapper_clblas.h')
-rw-r--r--test/wrapper_clblas.h104
1 files changed, 86 insertions, 18 deletions
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 259aa27c..fb6e83aa 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -378,7 +378,7 @@ clblasStatus clblasXdot<float>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float>(context, n);
return clblasSdot(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -395,7 +395,7 @@ clblasStatus clblasXdot<double>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double>(context, n);
return clblasDdot(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -421,7 +421,7 @@ clblasStatus clblasXdotu<float2>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotu(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -438,7 +438,7 @@ clblasStatus clblasXdotu<double2>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotu(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -464,7 +464,7 @@ clblasStatus clblasXdotc<float2>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotc(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -481,7 +481,7 @@ clblasStatus clblasXdotc<double2>(const size_t n,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotc(n,
dot_buffer, dot_offset,
x_buffer, x_offset, static_cast<int>(x_inc),
@@ -490,6 +490,74 @@ clblasStatus clblasXdotc<double2>(const size_t n,
num_queues, queues, num_wait_events, wait_events, events);
}
+// Forwards the clBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
+template <typename T>
+clblasStatus clblasXnrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
+template <>
+clblasStatus clblasXnrm2<float>(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto queue = Queue(queues[0]);
+ auto context = queue.GetContext();
+ auto scratch_buffer = Buffer<float>(context, 2*n);
+ return clblasSnrm2(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ scratch_buffer(),
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXnrm2<double>(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto queue = Queue(queues[0]);
+ auto context = queue.GetContext();
+ auto scratch_buffer = Buffer<double>(context, 2*n);
+ return clblasDnrm2(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ scratch_buffer(),
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXnrm2<float2>(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto queue = Queue(queues[0]);
+ auto context = queue.GetContext();
+ auto scratch_buffer = Buffer<float2>(context, 2*n);
+ return clblasScnrm2(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ scratch_buffer(),
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXnrm2<double2>(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto queue = Queue(queues[0]);
+ auto context = queue.GetContext();
+ auto scratch_buffer = Buffer<double2>(context, 2*n);
+ return clblasDznrm2(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ scratch_buffer(),
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
@@ -887,7 +955,7 @@ clblasStatus clblasXtrmv<float>(const clblasOrder layout, const clblasUplo trian
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float>(context, n);
return clblasStrmv(layout, triangle, a_transpose, diagonal,
n,
a_buffer, a_offset, a_ld,
@@ -904,7 +972,7 @@ clblasStatus clblasXtrmv<double>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double>(context, n);
return clblasDtrmv(layout, triangle, a_transpose, diagonal,
n,
a_buffer, a_offset, a_ld,
@@ -921,7 +989,7 @@ clblasStatus clblasXtrmv<float2>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtrmv(layout, triangle, a_transpose, diagonal,
n,
a_buffer, a_offset, a_ld,
@@ -938,7 +1006,7 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtrmv(layout, triangle, a_transpose, diagonal,
n,
a_buffer, a_offset, a_ld,
@@ -964,7 +1032,7 @@ clblasStatus clblasXtbmv<float>(const clblasOrder layout, const clblasUplo trian
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float>(context, n);
return clblasStbmv(layout, triangle, a_transpose, diagonal,
n, k,
a_buffer, a_offset, a_ld,
@@ -981,7 +1049,7 @@ clblasStatus clblasXtbmv<double>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double>(context, n);
return clblasDtbmv(layout, triangle, a_transpose, diagonal,
n, k,
a_buffer, a_offset, a_ld,
@@ -998,7 +1066,7 @@ clblasStatus clblasXtbmv<float2>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtbmv(layout, triangle, a_transpose, diagonal,
n, k,
a_buffer, a_offset, a_ld,
@@ -1015,7 +1083,7 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtbmv(layout, triangle, a_transpose, diagonal,
n, k,
a_buffer, a_offset, a_ld,
@@ -1041,7 +1109,7 @@ clblasStatus clblasXtpmv<float>(const clblasOrder layout, const clblasUplo trian
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float>(context, n);
return clblasStpmv(layout, triangle, a_transpose, diagonal,
n,
ap_buffer, ap_offset,
@@ -1058,7 +1126,7 @@ clblasStatus clblasXtpmv<double>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double>(context, n);
return clblasDtpmv(layout, triangle, a_transpose, diagonal,
n,
ap_buffer, ap_offset,
@@ -1075,7 +1143,7 @@ clblasStatus clblasXtpmv<float2>(const clblasOrder layout, const clblasUplo tria
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<float2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtpmv(layout, triangle, a_transpose, diagonal,
n,
ap_buffer, ap_offset,
@@ -1092,7 +1160,7 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
- auto scratch_buffer = Buffer<double2>(context, n*x_inc + x_offset);
+ auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtpmv(layout, triangle, a_transpose, diagonal,
n,
ap_buffer, ap_offset,