diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-03 21:46:07 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-04-03 21:46:07 +0200 |
commit | af9a521042ffc2823f60e12018db9e0a29120628 (patch) | |
tree | 5e5c1c6bf15c928da40ddb0f31c5fa20625cc052 /test/wrapper_cuda.hpp | |
parent | 0cebcbcc71d8c2d52b0d339fc75032e189d5946f (diff) |
Fixes the CUDA wrapper (now actually tested on a system with CUDA)
Diffstat (limited to 'test/wrapper_cuda.hpp')
-rw-r--r-- | test/wrapper_cuda.hpp | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp index 0f24d0d9..509de9d1 100644 --- a/test/wrapper_cuda.hpp +++ b/test/wrapper_cuda.hpp @@ -22,7 +22,7 @@ #include "utilities/utilities.hpp" #ifdef CLBLAST_REF_CUBLAS - #include <cuda.h> + #include <cuda_runtime.h> #include <cublas_v2.h> #endif @@ -32,33 +32,33 @@ namespace clblast { // Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards #ifdef CLBLAST_REF_CUBLAS template <typename T> - void CUDAToHost(const T* buffer_cuda, const std::vector<T> &buffer_host, const size_t size) { + void CUDAToHost(T* buffer_cuda, std::vector<T> &buffer_host, const size_t size) { cudaMemcpy( - std::reinterpret_cast<void*>(buffer_host.data()), - std::reinterpret_cast<void*>(buffer_cuda), + reinterpret_cast<void*>(buffer_host.data()), + reinterpret_cast<void*>(buffer_cuda), size*sizeof(T), cudaMemcpyDeviceToHost ); cudaFree(buffer_cuda); } #else - template <typename T> void CUDAToHost(const T*, const std::vector<T>&, const size_t) { } + template <typename T> void CUDAToHost(T*, const std::vector<T>&, const size_t) { } #endif // Allocates space on the CUDA device and copies in data from the host #ifdef CLBLAST_REF_CUBLAS template <typename T> - void HostToCUDA(const T* buffer_cuda, const std::vector<T> &buffer_host, const size_t size) { - cudaMalloc(std::reinterpret_cast<void**>&buffer_cuda, size*sizeof(T)); + void HostToCUDA(T* buffer_cuda, std::vector<T> &buffer_host, const size_t size) { + cudaMalloc(reinterpret_cast<void**>(&buffer_cuda), size*sizeof(T)); cudaMemcpy( - std::reinterpret_cast<void*>(buffer_cuda), - std::reinterpret_cast<void*>(buffer_host.data()), + reinterpret_cast<void*>(buffer_cuda), + reinterpret_cast<void*>(buffer_host.data()), size*sizeof(T), cudaMemcpyHostToDevice ); } #else - template <typename T> void HostToCUDA(const T*, const std::vector<T>&, const size_t) { } + template <typename T> void HostToCUDA(T*, const std::vector<T>&, const size_t) { } #endif // ================================================================================================= |