summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-10-15 13:56:19 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-10-15 13:56:19 +0200
commita3069a97c3e5c22635786870c8a9d02ca16d3d1d (patch)
tree862676b6695e82d275d6f98ed4ed350186c00a82
parent7408da174c848ffeaa1fe2da52f26a057e65b0f1 (diff)
Prepared test and client infrastructure for use with the CUDA API
-rw-r--r--CMakeLists.txt6
-rw-r--r--test/correctness/testblas.cpp44
-rw-r--r--test/correctness/tester.hpp2
-rw-r--r--test/performance/client.hpp2
-rw-r--r--test/routines/level1/xaxpy.hpp21
-rw-r--r--test/test_utilities.cpp44
-rw-r--r--test/test_utilities.hpp21
7 files changed, 78 insertions, 62 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4e47215..e2f43f8e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -165,8 +165,10 @@ endif()
# Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake",
# "FindCBLAS.cmake" and "FindcuBLAS.cmake" are included.
if(CLIENTS OR TESTS)
- find_package(clBLAS)
find_package(CBLAS)
+ if(OPENCL)
+ find_package(clBLAS)
+ endif()
if(CUBLAS)
find_package(cuBLAS)
endif()
@@ -195,6 +197,8 @@ set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm)
set(LEVELX_ROUTINES xomatcopy xim2col xaxpybatched xgemmbatched)
set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES} ${LEVELX_ROUTINES})
set(PRECISIONS 32 64 3232 6464 16)
+
+# Sample programs
if(OPENCL)
set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched)
set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache)
diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp
index 659131c5..aa4b4785 100644
--- a/test/correctness/testblas.cpp
+++ b/test/correctness/testblas.cpp
@@ -241,36 +241,22 @@ void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const st
std::cout << std::flush;
}
- // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
+ // Creates the buffers. Note: we are not using the cxpp11.h C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
- auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr);
- auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr);
- auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr);
- auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr);
- auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr);
- auto ap1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr);
- auto d1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr);
- auto x_vec1 = Buffer<T>(x1);
- auto y_vec1 = Buffer<T>(y1);
- auto a_mat1 = Buffer<T>(a1);
- auto b_mat1 = Buffer<T>(b1);
- auto c_mat1 = Buffer<T>(c1);
- auto ap_mat1 = Buffer<T>(ap1);
- auto scalar1 = Buffer<T>(d1);
- auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr);
- auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr);
- auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr);
- auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr);
- auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr);
- auto ap2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr);
- auto d2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr);
- auto x_vec2 = Buffer<T>(x2);
- auto y_vec2 = Buffer<T>(y2);
- auto a_mat2 = Buffer<T>(a2);
- auto b_mat2 = Buffer<T>(b2);
- auto c_mat2 = Buffer<T>(c2);
- auto ap_mat2 = Buffer<T>(ap2);
- auto scalar2 = Buffer<T>(d2);
+ auto x_vec1 = CreateInvalidBuffer<T>(context_, args.x_size);
+ auto y_vec1 = CreateInvalidBuffer<T>(context_, args.y_size);
+ auto a_mat1 = CreateInvalidBuffer<T>(context_, args.a_size);
+ auto b_mat1 = CreateInvalidBuffer<T>(context_, args.b_size);
+ auto c_mat1 = CreateInvalidBuffer<T>(context_, args.c_size);
+ auto ap_mat1 = CreateInvalidBuffer<T>(context_, args.ap_size);
+ auto scalar1 = CreateInvalidBuffer<T>(context_, args.scalar_size);
+ auto x_vec2 = CreateInvalidBuffer<T>(context_, args.x_size);
+ auto y_vec2 = CreateInvalidBuffer<T>(context_, args.y_size);
+ auto a_mat2 = CreateInvalidBuffer<T>(context_, args.a_size);
+ auto b_mat2 = CreateInvalidBuffer<T>(context_, args.b_size);
+ auto c_mat2 = CreateInvalidBuffer<T>(context_, args.c_size);
+ auto ap_mat2 = CreateInvalidBuffer<T>(context_, args.ap_size);
+ auto scalar2 = CreateInvalidBuffer<T>(context_, args.scalar_size);
auto buffers1 = Buffers<T>{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1};
auto buffers2 = Buffers<T>{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2};
diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp
index caf03787..640f870a 100644
--- a/test/correctness/tester.hpp
+++ b/test/correctness/tester.hpp
@@ -22,13 +22,13 @@
#include <vector>
#include <memory>
+#include "utilities/utilities.hpp"
#include "test/test_utilities.hpp"
// The libraries
#ifdef CLBLAST_REF_CLBLAS
#include <clBLAS.h>
#endif
-#include "clblast.h"
namespace clblast {
// =================================================================================================
diff --git a/test/performance/client.hpp b/test/performance/client.hpp
index 2ba09cb9..0b6176c8 100644
--- a/test/performance/client.hpp
+++ b/test/performance/client.hpp
@@ -32,7 +32,7 @@
#include <clBLAS.h>
#endif
#include "test/wrapper_cuda.hpp"
-#include "clblast.h"
+#include "utilities/utilities.hpp"
namespace clblast {
// =================================================================================================
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index 17cae6ad..cdceb4c7 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -70,13 +70,20 @@ class TestXaxpy {
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = Axpy(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- &queue_plain, &event);
- if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
+ #ifdef OPENCL_API
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = Axpy(args.n, args.alpha,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ &queue_plain, &event);
+ if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
+ #elif CUDA_API
+ auto status = Axpy(args.n, args.alpha,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ queue.GetContext()(), queue.GetDevice()());
+ #endif
return status;
}
diff --git a/test/test_utilities.cpp b/test/test_utilities.cpp
index 579eb61c..84f8894f 100644
--- a/test/test_utilities.cpp
+++ b/test/test_utilities.cpp
@@ -88,27 +88,29 @@ void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& sour
}
// As above, but now for OpenCL data-types instead of std::vectors
-Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw) {
- const auto size = source.GetSize() / sizeof(half);
- auto queue = Queue(queue_raw);
- auto context = queue.GetContext();
- auto source_cpu = std::vector<half>(size);
- source.Read(queue, size, source_cpu);
- auto result_cpu = HalfToFloatBuffer(source_cpu);
- auto result = Buffer<float>(context, size);
- result.Write(queue, size, result_cpu);
- return result;
-}
-void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw) {
- const auto size = source.GetSize() / sizeof(float);
- auto queue = Queue(queue_raw);
- auto context = queue.GetContext();
- auto source_cpu = std::vector<float>(size);
- source.Read(queue, size, source_cpu);
- auto result_cpu = std::vector<half>(size);
- FloatToHalfBuffer(result_cpu, source_cpu);
- result.Write(queue, size, result_cpu);
-}
+#ifdef OPENCL_API
+ Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw) {
+ const auto size = source.GetSize() / sizeof(half);
+ auto queue = Queue(queue_raw);
+ auto context = queue.GetContext();
+ auto source_cpu = std::vector<half>(size);
+ source.Read(queue, size, source_cpu);
+ auto result_cpu = HalfToFloatBuffer(source_cpu);
+ auto result = Buffer<float>(context, size);
+ result.Write(queue, size, result_cpu);
+ return result;
+ }
+ void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw) {
+ const auto size = source.GetSize() / sizeof(float);
+ auto queue = Queue(queue_raw);
+ auto context = queue.GetContext();
+ auto source_cpu = std::vector<float>(size);
+ source.Read(queue, size, source_cpu);
+ auto result_cpu = std::vector<half>(size);
+ FloatToHalfBuffer(result_cpu, source_cpu);
+ result.Write(queue, size, result_cpu);
+ }
+#endif
// =================================================================================================
} // namespace clblast
diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp
index fe7a9cd2..d03c55fc 100644
--- a/test/test_utilities.hpp
+++ b/test/test_utilities.hpp
@@ -89,8 +89,25 @@ std::vector<float> HalfToFloatBuffer(const std::vector<half>& source);
void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source);
// As above, but now for OpenCL data-types instead of std::vectors
-Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw);
-void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw);
+#ifdef OPENCL_API
+ Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, RawCommandQueue queue_raw);
+ void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, RawCommandQueue queue_raw);
+#endif
+
+// =================================================================================================
+
+// Creates a buffer but don't test for validity. That's the reason this is not using the clpp11.h or
+// cupp11.h interface.
+template <typename T>
+Buffer<T> CreateInvalidBuffer(const Context& context, const size_t size) {
+ #ifdef OPENCL_API
+ auto raw_buffer = clCreateBuffer(context(), CL_MEM_READ_WRITE, size * sizeof(T), nullptr, nullptr);
+ #elif CUDA_API
+ CUdeviceptr raw_buffer;
+ cuMemAlloc(&raw_buffer, size * sizeof(T));
+ #endif
+ return Buffer<T>(raw_buffer);
+}
// =================================================================================================
} // namespace clblast