From a3069a97c3e5c22635786870c8a9d02ca16d3d1d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 Oct 2017 13:56:19 +0200 Subject: Prepared test and client infrastructure for use with the CUDA API --- test/routines/level1/xaxpy.hpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'test/routines/level1/xaxpy.hpp') diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index 17cae6ad..cdceb4c7 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -70,13 +70,20 @@ class TestXaxpy { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Axpy(args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + #endif return status; } -- cgit v1.2.3 From 7663cba23487290d7bf62c268410c840e3ee7972 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 Oct 2017 17:43:20 +0200 Subject: Fixes for the CUDA API: first tests pass and the client runs --- src/kernels/opencl_to_cuda.h | 4 ++-- test/routines/level1/xaxpy.hpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'test/routines/level1/xaxpy.hpp') diff --git a/src/kernels/opencl_to_cuda.h b/src/kernels/opencl_to_cuda.h index fac30dfc..7602b539 100644 --- a/src/kernels/opencl_to_cuda.h +++ b/src/kernels/opencl_to_cuda.h @@ -32,8 +32,8 @@ __device__ int get_group_id(const int x) { return blockIdx.z; } __device__ int get_global_size(const int x) { - if (x == 0) { return gridDim.x; } - if (x == 1) { return gridDim.y; } + if (x == 0) { return gridDim.x * blockDim.x; } + if (x == 1) { return gridDim.y * blockDim.y; } return gridDim.z; } __device__ int get_global_id(const int x) { diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index cdceb4c7..7491a9e8 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -83,6 +83,7 @@ class TestXaxpy { buffers.x_vec(), args.x_offset, args.x_inc, buffers.y_vec(), args.y_offset, args.y_inc, queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); #endif return status; } -- cgit v1.2.3