From ed980a1df1482e188e1d579b5025e7c86a5ec65c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 24 Sep 2017 15:44:14 +0200 Subject: Updated database override function to work with the new database storage format --- test/correctness/misc/override_parameters.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'test/correctness') diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index 535d9286..95ece98c 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -37,6 +37,7 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st const auto valid_settings = std::vector>{ { {"KWG",16}, {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",16}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} }, { {"KWG",32}, {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",32}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} }, + { {"KWG",16}, {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",16}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} }, }; const auto invalid_settings = std::vector>{ { {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",16}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",16}, {"SA",0} }, -- cgit v1.2.3 From 74fd6767b93b03fc62462f44854215c4c320babe Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 1 Oct 2017 20:36:56 +0200 Subject: GEMM tests now test both the in-direct and the direct kernels seperately --- test/correctness/misc/override_parameters.cpp | 2 +- test/correctness/routines/level3/xgemm.cpp | 20 +++++++++++++++----- test/performance/routines/level3/xgemm.cpp | 10 +++++----- test/routines/level3/xgemm.hpp | 9 ++++++++- 4 files changed, 29 insertions(+), 12 deletions(-) (limited to 'test/correctness') diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index 95ece98c..05f40f57 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -28,7 +28,7 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st auto arguments = RetrieveCommandLineArguments(argc, argv); auto errors = size_t{0}; auto passed = size_t{0}; - auto example_routine = TestXgemm(); + auto example_routine = TestXgemm<0, T>(); constexpr auto kSeed = 42; // fixed seed for reproducibility // Determines the test settings diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp index 5de73554..bdf57b36 100644 --- a/test/correctness/routines/level3/xgemm.cpp +++ b/test/correctness/routines/level3/xgemm.cpp @@ -15,11 +15,21 @@ // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMM"); - errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); - errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); - errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); - errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMM"); + + // Tests GEMM based on the 'in-direct' kernel + errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMM"); + errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); + errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMM"); + + // Tests GEMM based on the 'direct' kernel + errors += clblast::RunTests, float, float>(argc, argv, true, "SGEMM"); + errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); + errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMM"); + if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/performance/routines/level3/xgemm.cpp b/test/performance/routines/level3/xgemm.cpp index 5b3426f5..0b67b4d3 100644 --- a/test/performance/routines/level3/xgemm.cpp +++ b/test/performance/routines/level3/xgemm.cpp @@ -17,15 +17,15 @@ int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: - clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; + clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; case clblast::Precision::kSingle: - clblast::RunClient, float, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index 7e0ead6d..1c430c1c 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -22,7 +22,7 @@ namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class -template +template // 'V' is the version of the kernel (0 for default, 1 for 'in-direct', 2 for 'direct') class TestXgemm { public: @@ -83,6 +83,13 @@ class TestXgemm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { + if (V != 0) { + const auto device = queue.GetDevice(); + const auto switch_threshold = (V == 1) ? size_t{0} : size_t{1024 * 1024 * 1024}; // large enough for tests + const auto override_status = OverrideParameters(device(), "KernelSelection", PrecisionValue(), + {{"XGEMM_MIN_INDIRECT_SIZE", switch_threshold}}); + if (override_status != StatusCode::kSuccess) { return override_status; } + } auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, -- cgit v1.2.3 From 9224da19ef384c1a7986587a682035905f63cf55 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 9 Oct 2017 20:06:25 +0200 Subject: Fixed the Python generator script w.r.t. the recent change of testing direct/in-direct GEMM kernels separately --- scripts/generator/generator/cpp.py | 15 ++++++++++----- scripts/generator/generator/datatype.py | 6 +++--- test/correctness/routines/level3/xgemm.cpp | 5 ----- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'test/correctness') diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 964b8f3e..5fef3083 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -364,7 +364,9 @@ def performance_test(routine, level_string): found = False for flavour in routine.flavours: if flavour.precision_name == precision: - result += NL + " clblast::RunClient(beta)[0], reinterpret_cast(beta)[1]}" return "beta" - def test_template(self): + def test_template(self, extra_template_argument): """Returns the template as used in the correctness/performance tests""" buffer_type = "clblast::" + self.buffer_type if self.is_non_standard() else self.buffer_type beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2] else self.beta_cpp if self.buffer_type != self.beta_cpp: - return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp - return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp + return "<" + extra_template_argument + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp + return "<" + extra_template_argument + buffer_type + ">, " + buffer_type + ", " + beta_cpp def is_complex(self, scalar): """Current scalar is complex""" diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp index bdf57b36..351e538b 100644 --- a/test/correctness/routines/level3/xgemm.cpp +++ b/test/correctness/routines/level3/xgemm.cpp @@ -15,21 +15,16 @@ // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - - // Tests GEMM based on the 'in-direct' kernel errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMM"); - - // Tests GEMM based on the 'direct' kernel errors += clblast::RunTests, float, float>(argc, argv, true, "SGEMM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMM"); - if (errors > 0) { return 1; } else { return 0; } } -- cgit v1.2.3 From a3069a97c3e5c22635786870c8a9d02ca16d3d1d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 Oct 2017 13:56:19 +0200 Subject: Prepared test and client infrastructure for use with the CUDA API --- CMakeLists.txt | 6 +++++- test/correctness/testblas.cpp | 44 ++++++++++++++---------------------------- test/correctness/tester.hpp | 2 +- test/performance/client.hpp | 2 +- test/routines/level1/xaxpy.hpp | 21 +++++++++++++------- test/test_utilities.cpp | 44 ++++++++++++++++++++++-------------------- test/test_utilities.hpp | 21 ++++++++++++++++++-- 7 files changed, 78 insertions(+), 62 deletions(-) (limited to 'test/correctness') diff --git a/CMakeLists.txt b/CMakeLists.txt index d4e47215..e2f43f8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,8 +165,10 @@ endif() # Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake", # "FindCBLAS.cmake" and "FindcuBLAS.cmake" are included. if(CLIENTS OR TESTS) - find_package(clBLAS) find_package(CBLAS) + if(OPENCL) + find_package(clBLAS) + endif() if(CUBLAS) find_package(cuBLAS) endif() @@ -195,6 +197,8 @@ set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm) set(LEVELX_ROUTINES xomatcopy xim2col xaxpybatched xgemmbatched) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES} ${LEVELX_ROUTINES}) set(PRECISIONS 32 64 3232 6464 16) + +# Sample programs if(OPENCL) set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched) set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache) diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index 659131c5..aa4b4785 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -241,36 +241,22 @@ void TestBlas::TestInvalid(std::vector> &test_vector, const st std::cout << std::flush; } - // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly + // Creates the buffers. Note: we are not using the cxpp11.h C++ version since we explicitly // want to be able to create invalid buffers (no error checking here). - auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec1 = Buffer(x1); - auto y_vec1 = Buffer(y1); - auto a_mat1 = Buffer(a1); - auto b_mat1 = Buffer(b1); - auto c_mat1 = Buffer(c1); - auto ap_mat1 = Buffer(ap1); - auto scalar1 = Buffer(d1); - auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr); - auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr); - auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr); - auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr); - auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr); - auto ap2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.ap_size*sizeof(T), nullptr,nullptr); - auto d2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.scalar_size*sizeof(T), nullptr,nullptr); - auto x_vec2 = Buffer(x2); - auto y_vec2 = Buffer(y2); - auto a_mat2 = Buffer(a2); - auto b_mat2 = Buffer(b2); - auto c_mat2 = Buffer(c2); - auto ap_mat2 = Buffer(ap2); - auto scalar2 = Buffer(d2); + auto x_vec1 = CreateInvalidBuffer(context_, args.x_size); + auto y_vec1 = CreateInvalidBuffer(context_, args.y_size); + auto a_mat1 = CreateInvalidBuffer(context_, args.a_size); + auto b_mat1 = CreateInvalidBuffer(context_, args.b_size); + auto c_mat1 = CreateInvalidBuffer(context_, args.c_size); + auto ap_mat1 = CreateInvalidBuffer(context_, args.ap_size); + auto scalar1 = CreateInvalidBuffer(context_, args.scalar_size); + auto x_vec2 = CreateInvalidBuffer(context_, args.x_size); + auto y_vec2 = CreateInvalidBuffer(context_, args.y_size); + auto a_mat2 = CreateInvalidBuffer(context_, args.a_size); + auto b_mat2 = CreateInvalidBuffer(context_, args.b_size); + auto c_mat2 = CreateInvalidBuffer(context_, args.c_size); + auto ap_mat2 = CreateInvalidBuffer(context_, args.ap_size); + auto scalar2 = CreateInvalidBuffer(context_, args.scalar_size); auto buffers1 = Buffers{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1, ap_mat1, scalar1}; auto buffers2 = Buffers{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2, ap_mat2, scalar2}; diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp index caf03787..640f870a 100644 --- a/test/correctness/tester.hpp +++ b/test/correctness/tester.hpp @@ -22,13 +22,13 @@ #include #include +#include "utilities/utilities.hpp" #include "test/test_utilities.hpp" // The libraries #ifdef CLBLAST_REF_CLBLAS #include #endif -#include "clblast.h" namespace clblast { // ================================================================================================= diff --git a/test/performance/client.hpp b/test/performance/client.hpp index 2ba09cb9..0b6176c8 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -32,7 +32,7 @@ #include #endif #include "test/wrapper_cuda.hpp" -#include "clblast.h" +#include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index 17cae6ad..cdceb4c7 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -70,13 +70,20 @@ class TestXaxpy { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Axpy(args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Axpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + queue.GetContext()(), queue.GetDevice()()); + #endif return status; } diff --git a/test/test_utilities.cpp b/test/test_utilities.cpp index 579eb61c..84f8894f 100644 --- a/test/test_utilities.cpp +++ b/test/test_utilities.cpp @@ -88,27 +88,29 @@ void FloatToHalfBuffer(std::vector& result, const std::vector& sour } // As above, but now for OpenCL data-types instead of std::vectors -Buffer HalfToFloatBuffer(const Buffer& source, RawCommandQueue queue_raw) { - const auto size = source.GetSize() / sizeof(half); - auto queue = Queue(queue_raw); - auto context = queue.GetContext(); - auto source_cpu = std::vector(size); - source.Read(queue, size, source_cpu); - auto result_cpu = HalfToFloatBuffer(source_cpu); - auto result = Buffer(context, size); - result.Write(queue, size, result_cpu); - return result; -} -void FloatToHalfBuffer(Buffer& result, const Buffer& source, RawCommandQueue queue_raw) { - const auto size = source.GetSize() / sizeof(float); - auto queue = Queue(queue_raw); - auto context = queue.GetContext(); - auto source_cpu = std::vector(size); - source.Read(queue, size, source_cpu); - auto result_cpu = std::vector(size); - FloatToHalfBuffer(result_cpu, source_cpu); - result.Write(queue, size, result_cpu); -} +#ifdef OPENCL_API + Buffer HalfToFloatBuffer(const Buffer& source, RawCommandQueue queue_raw) { + const auto size = source.GetSize() / sizeof(half); + auto queue = Queue(queue_raw); + auto context = queue.GetContext(); + auto source_cpu = std::vector(size); + source.Read(queue, size, source_cpu); + auto result_cpu = HalfToFloatBuffer(source_cpu); + auto result = Buffer(context, size); + result.Write(queue, size, result_cpu); + return result; + } + void FloatToHalfBuffer(Buffer& result, const Buffer& source, RawCommandQueue queue_raw) { + const auto size = source.GetSize() / sizeof(float); + auto queue = Queue(queue_raw); + auto context = queue.GetContext(); + auto source_cpu = std::vector(size); + source.Read(queue, size, source_cpu); + auto result_cpu = std::vector(size); + FloatToHalfBuffer(result_cpu, source_cpu); + result.Write(queue, size, result_cpu); + } +#endif // ================================================================================================= } // namespace clblast diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index fe7a9cd2..d03c55fc 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -89,8 +89,25 @@ std::vector HalfToFloatBuffer(const std::vector& source); void FloatToHalfBuffer(std::vector& result, const std::vector& source); // As above, but now for OpenCL data-types instead of std::vectors -Buffer HalfToFloatBuffer(const Buffer& source, RawCommandQueue queue_raw); -void FloatToHalfBuffer(Buffer& result, const Buffer& source, RawCommandQueue queue_raw); +#ifdef OPENCL_API + Buffer HalfToFloatBuffer(const Buffer& source, RawCommandQueue queue_raw); + void FloatToHalfBuffer(Buffer& result, const Buffer& source, RawCommandQueue queue_raw); +#endif + +// ================================================================================================= + +// Creates a buffer but don't test for validity. That's the reason this is not using the clpp11.h or +// cupp11.h interface. +template +Buffer CreateInvalidBuffer(const Context& context, const size_t size) { + #ifdef OPENCL_API + auto raw_buffer = clCreateBuffer(context(), CL_MEM_READ_WRITE, size * sizeof(T), nullptr, nullptr); + #elif CUDA_API + CUdeviceptr raw_buffer; + cuMemAlloc(&raw_buffer, size * sizeof(T)); + #endif + return Buffer(raw_buffer); +} // ================================================================================================= } // namespace clblast -- cgit v1.2.3