summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-10-15 17:43:20 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-10-15 17:43:20 +0200
commit7663cba23487290d7bf62c268410c840e3ee7972 (patch)
tree1da30f7785e7b6984ed241aecdaba4093510ab6a
parent71049e8d3966ac58263355a41abb4eac5dec818f (diff)
Fixes for the CUDA API: first tests pass and the client runs
-rw-r--r--src/kernels/opencl_to_cuda.h4
-rw-r--r--test/routines/level1/xaxpy.hpp1
2 files changed, 3 insertions, 2 deletions
diff --git a/src/kernels/opencl_to_cuda.h b/src/kernels/opencl_to_cuda.h
index fac30dfc..7602b539 100644
--- a/src/kernels/opencl_to_cuda.h
+++ b/src/kernels/opencl_to_cuda.h
@@ -32,8 +32,8 @@ __device__ int get_group_id(const int x) {
return blockIdx.z;
}
__device__ int get_global_size(const int x) {
- if (x == 0) { return gridDim.x; }
- if (x == 1) { return gridDim.y; }
+ if (x == 0) { return gridDim.x * blockDim.x; }
+ if (x == 1) { return gridDim.y * blockDim.y; }
return gridDim.z;
}
__device__ int get_global_id(const int x) {
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index cdceb4c7..7491a9e8 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -83,6 +83,7 @@ class TestXaxpy {
buffers.x_vec(), args.x_offset, args.x_inc,
buffers.y_vec(), args.y_offset, args.y_inc,
queue.GetContext()(), queue.GetDevice()());
+ cuStreamSynchronize(queue());
#endif
return status;
}