diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-15 13:56:19 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-15 13:56:19 +0200 |
commit | a3069a97c3e5c22635786870c8a9d02ca16d3d1d (patch) | |
tree | 862676b6695e82d275d6f98ed4ed350186c00a82 | |
parent | 7408da174c848ffeaa1fe2da52f26a057e65b0f1 (diff) |
Prepared test and client infrastructure for use with the CUDA API
-rw-r--r-- | CMakeLists.txt | 6 | ||||
-rw-r--r-- | test/correctness/testblas.cpp | 44 | ||||
-rw-r--r-- | test/correctness/tester.hpp | 2 | ||||
-rw-r--r-- | test/performance/client.hpp | 2 | ||||
-rw-r--r-- | test/routines/level1/xaxpy.hpp | 21 | ||||
-rw-r--r-- | test/test_utilities.cpp | 44 | ||||
-rw-r--r-- | test/test_utilities.hpp | 21 |
7 files changed, 78 insertions, 62 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d4e47215..e2f43f8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,8 +165,10 @@ endif() # Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake", # "FindCBLAS.cmake" and "FindcuBLAS.cmake" are included. if(CLIENTS OR TESTS) - find_package(clBLAS) find_package(CBLAS) + if(OPENCL) + find_package(clBLAS) + endif() if(CUBLAS) find_package(cuBLAS) endif() @@ -195,6 +197,8 @@ set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm) set(LEVELX_ROUTINES xomatcopy xim2col xaxpybatched xgemmbatched) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES} ${LEVELX_ROUTINES}) set(PRECISIONS 32 64 3232 6464 16) + +# Sample programs if(OPENCL) set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched) set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache) diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index 659131c5..aa4b4785 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -241,36 +241,22 @@ void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const st std::cout << std::flush; } - // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly + // Creates the buffers. Note: we are not using the cxpp11.h C++ version since we explicitly // want to be able to create invalid buffers (no error checking here). - auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec1 = Buffer<T>(x1); - auto y_vec1 = Buffer<T>(y1); - auto a_mat1 = Buffer<T>(a1); - auto b_mat1 = Buffer<T>(b1); - auto c_mat1 = Buffer<T>(c1); - auto ap_mat1 = Buffer<T>(ap1); - auto scalar1 = Buffer<T>(d1); - auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec2 = Buffer<T>(x2); - auto y_vec2 = Buffer<T>(y2); - auto a_mat2 = Buffer<T>(a2); - auto b_mat2 = Buffer<T>(b2); - auto c_mat2 = Buffer<T>(c2); - auto ap_mat2 = Buffer<T>(ap2); - auto scalar2 = Buffer<T>(d2); + auto x_vec1 = CreateInvalidBuffer<T>(context_, args.x_size); + auto y_vec1 = CreateInvalidBuffer<T>(context_, args.y_size); + auto a_mat1 = CreateInvalidBuffer<T>(context_, args.a_size); + auto b_mat1 = CreateInvalidBuffer<T>(context_, args.b_size); + auto c_mat1 = CreateInvalidBuffer<T>(context_, args.c_size); + auto ap_mat1 = CreateInvalidBuffer<T>(context_, args.ap_size); + auto scalar1 = CreateInvalidBuffer<T>(context_, args.scalar_size); + auto x_vec2 = CreateInvalidBuffer<T>(context_, args.x_size); + auto y_vec2 = CreateInvalidBuffer<T>(context_, args.y_size); + auto a_mat2 = CreateInvalidBuffer<T>(context_, args.a_size); + auto b_mat2 = CreateInvalidBuffer<T>(context_, args.b_size); + auto c_mat2 = CreateInvalidBuffer<T>(context_, args.c_size); + auto ap_mat2 = CreateInvalidBuffer<T>(context_, args.ap_size); + auto scalar2 = CreateInvalidBuffer<T>(context_, args.scalar_size); auto buffers1 = Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}; auto buffers2 = Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}; diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp index caf03787..640f870a 100644 --- a/test/correctness/tester.hpp +++ b/test/correctness/tester.hpp @@ -22,13 +22,13 @@ #include <vector> #include <memory> +#include "utilities/utilities.hpp" #include "test/test_utilities.hpp" // The libraries #ifdef CLBLAST_REF_CLBLAS #include <clBLAS.h> #endif -#include "clblast.h" namespace clblast { // ================================================================================================= diff --git a/test/performance/client.hpp b/test/performance/client.hpp index 2ba09cb9..0b6176c8 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -32,7 +32,7 @@ #include <clBLAS.h> #endif #include "test/wrapper_cuda.hpp" -#include "clblast.h" +#include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index 17cae6ad..cdceb4c7 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -70,13 +70,20 @@ class TestXaxpy { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Axpy(args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + #endif return status; } diff --git a/test/test_utilities.cpp b/test/test_utilities.cpp index 579eb61c..84f8894f 100644 --- a/test/test_utilities.cpp +++ b/test/test_utilities.cpp @@ -88,27 +88,29 @@ void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& sour } // As above, but now for OpenCL data-types instead of std::vectors -Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw) { - const auto size = source.GetSize() / sizeof(half); - auto queue = Queue(queue_raw); - auto context = queue.GetContext(); - auto source_cpu = std::vector<half>(size); - source.Read(queue, size, source_cpu); - auto result_cpu = HalfToFloatBuffer(source_cpu); - auto result = Buffer<float>(context, size); - result.Write(queue, size, result_cpu); - return result; -} -void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw) { - const auto size = source.GetSize() / sizeof(float); - auto queue = Queue(queue_raw); - auto context = queue.GetContext(); - auto source_cpu = std::vector<float>(size); - source.Read(queue, size, source_cpu); - auto result_cpu = std::vector<half>(size); - FloatToHalfBuffer(result_cpu, source_cpu); - result.Write(queue, size, result_cpu); -} +#ifdef OPENCL_API + Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw) { + const auto size = source.GetSize() / sizeof(half); + auto queue = Queue(queue_raw); + auto context = queue.GetContext(); + auto source_cpu = std::vector<half>(size); + source.Read(queue, size, source_cpu); + auto result_cpu = HalfToFloatBuffer(source_cpu); + auto result = Buffer<float>(context, size); + result.Write(queue, size, result_cpu); + return result; + } + void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw) { + const auto size = source.GetSize() / sizeof(float); + auto queue = Queue(queue_raw); + auto context = queue.GetContext(); + auto source_cpu = std::vector<float>(size); + source.Read(queue, size, source_cpu); + auto result_cpu = std::vector<half>(size); + FloatToHalfBuffer(result_cpu, source_cpu); + result.Write(queue, size, result_cpu); + } +#endif // ================================================================================================= } // namespace clblast diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index fe7a9cd2..d03c55fc 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -89,8 +89,25 @@ std::vector<float> HalfToFloatBuffer(const std::vector<half>& source); void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source); // As above, but now for OpenCL data-types instead of std::vectors -Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw); -void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw); +#ifdef OPENCL_API + Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw); + void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw); +#endif + +// ================================================================================================= + +// Creates a buffer but don't test for validity. That's the reason this is not using the clpp11.h or +// cupp11.h interface. +template <typename T> +Buffer<T> CreateInvalidBuffer(const Context& context, const size_t size) { + #ifdef OPENCL_API + auto raw_buffer = clCreateBuffer(context(), CL_MEM_READ_WRITE, size * sizeof(T), nullptr, nullptr); + #elif CUDA_API + CUdeviceptr raw_buffer; + cuMemAlloc(&raw_buffer, size * sizeof(T)); + #endif + return Buffer<T>(raw_buffer); +} // ================================================================================================= } // namespace clblast |