diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-16 17:53:51 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-16 17:53:51 +0200 |
commit | e3bb58f60277e70a26b2cef782945027871135d5 (patch) | |
tree | 5db934bba015b9fe16a5c52958eaa30431929484 | |
parent | f7f8ec644f51d16f888b6a7086009b79c0beef8f (diff) |
Finalized support for performance testing against cuBLAS
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 11 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | test/wrapper_cuda.hpp | 4 |
4 files changed, 11 insertions, 7 deletions
@@ -8,6 +8,7 @@ Development version (next release) - Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM - Tests now also exit with an error code when OpenCL errors or compilation errors occur - Tests now also check for the L2 error in case of half-precision +- Clients can now test against cuBLAS on NVIDIA systems for performance comparisons (-DCUBLAS=ON) - Replaced the R graph scripts with Python/Matplotlib scripts - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fb04071..b26de79a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ option(TUNERS "Enable compilation of the tuners" OFF) option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF) option(TESTS "Enable compilation of the correctness tests" OFF) option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF) +option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF) # Compile in verbose mode with additional diagnostic messages option(VERBOSE "Compile in verbose mode for additional diagnostic messages" OFF) @@ -134,14 +135,16 @@ endif() if(CLIENTS OR TESTS) find_package(clBLAS) find_package(CBLAS) - find_package(cuBLAS) - if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND AND NOT CUBLAS_FOUND) + if(CUBLAS) + find_package(cuBLAS) + endif() + if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND) if(TESTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, disabling the compilation of the tests") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests") set(TESTS OFF) endif() if(CLIENTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, head-to-head performance comparison not supported in the clients") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS, head-to-head performance comparison not supported in the clients") endif() endif() endif() @@ -199,7 +199,7 @@ All tests can be run directly together in one go through the `make alltests` tar Compiling the performance tests/clients (optional) ------------- -To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS) or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows: +To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS), cuBLAS (if testing on an NVIDIA GPU and `-DCUBLAS=ON` set), or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows: cmake -DCLIENTS=ON .. diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp index 51f897c4..c97ae3ef 100644 --- a/test/wrapper_cuda.hpp +++ b/test/wrapper_cuda.hpp @@ -72,7 +72,7 @@ namespace clblast { *buffer_cuda = nullptr; } #else - template <typename T> void CUDAToHost(T*, const std::vector<T>&, const size_t) { } + template <typename T> void CUDAToHost(T**, const std::vector<T>&, const size_t) { } #endif // Allocates space on the CUDA device and copies in data from the host @@ -96,7 +96,7 @@ namespace clblast { } } #else - template <typename T> void HostToCUDA(T*, const std::vector<T>&, const size_t) { } + template <typename T> void HostToCUDA(T**, const std::vector<T>&, const size_t) { } #endif // ================================================================================================= |