From e8dea34fcee36b6a10762653d3dcaadcb436cb80 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 25 Jul 2018 22:59:36 +0200 Subject: Added workaround for weird AMD SI Hainan bug --- src/clpp11.hpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 8ac0523f..45ef52d9 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -447,8 +447,15 @@ class Program { // Source-based constructor with memory management explicit Program(const Context &context, const std::string &source) { - const char *source_ptr = &source[0]; - const auto length = source.length(); + #ifdef AMD_HAINAN_WORKAROUND + const std::string source_hainan = source + "\n__kernel void null_kernel() {}\n"; + const char *source_ptr = &source_hainan[0]; + const auto length = source_hainan.length(); + printf("%s\n", source_hainan.c_str()); + #else + const char *source_ptr = &source[0]; + const auto length = source.length(); + #endif auto status = CL_SUCCESS; program_ = clCreateProgramWithSource(context(), 1, &source_ptr, &length, &status); CLCudaAPIError::Check(status, "clCreateProgramWithSource"); @@ -768,6 +775,10 @@ class Kernel { auto status = CL_SUCCESS; *kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status); CLCudaAPIError::Check(status, "clCreateKernel"); + #ifdef AMD_HAINAN_WORKAROUND + *null_kernel_ = clCreateKernel(program->operator()(), "null_kernel", &status); + CLCudaAPIError::Check(status, "clCreateKernel"); + #endif } // Sets a kernel argument at the indicated position @@ -831,12 +842,22 @@ class Kernel { static_cast(waitForEventsPlain.size()), !waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr, event)); + #ifdef AMD_HAINAN_WORKAROUND + const std::vector nullRange = {1}; + CheckError(clEnqueueNDRangeKernel(queue(), *null_kernel_, static_cast(nullRange.size()), + nullptr, nullRange.data(), nullptr, + static_cast(waitForEventsPlain.size()), + nullptr, event)); + #endif } // Accessor to the private data-member const cl_kernel& operator()() const { return *kernel_; } private: std::shared_ptr kernel_; + #ifdef AMD_HAINAN_WORKAROUND + std::shared_ptr null_kernel_; + #endif // Internal implementation for the recursive SetArguments function. template -- cgit v1.2.3 From f84036948b82d5f723f7f82bc2f6c08ea2f891e8 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 27 Jul 2018 20:38:01 +0200 Subject: Renamed AMD SI workaround defines --- CMakeLists.txt | 6 +++--- src/clpp11.hpp | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a26193d..fb62ae27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,9 +36,9 @@ option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF) option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF) # Workarounds for bugs -option(AMD_HAINAN "Enables workaround for bug in AMD SI Hainan GPUs" OFF) -if(AMD_HAINAN) - add_definitions(-DAMD_HAINAN_WORKAROUND) +option(AMD_SI_EMPTY_KERNEL_WORKAROUND "Enables workaround for bug in AMD Southern Island GPUs" OFF) +if(AMD_SI_EMPTY_KERNEL_WORKAROUND) + add_definitions(-DAMD_SI_EMPTY_KERNEL_WORKAROUND) endif() # Select between an OpenCL API (default) or a CUDA API (beta) diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 45ef52d9..093214b3 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -447,11 +447,10 @@ class Program { // Source-based constructor with memory management explicit Program(const Context &context, const std::string &source) { - #ifdef AMD_HAINAN_WORKAROUND + #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND const std::string source_hainan = source + "\n__kernel void null_kernel() {}\n"; const char *source_ptr = &source_hainan[0]; const auto length = source_hainan.length(); - printf("%s\n", source_hainan.c_str()); #else const char *source_ptr = &source[0]; const auto length = source.length(); @@ -775,7 +774,7 @@ class Kernel { auto status = CL_SUCCESS; *kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status); CLCudaAPIError::Check(status, "clCreateKernel"); - #ifdef AMD_HAINAN_WORKAROUND + #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND *null_kernel_ = clCreateKernel(program->operator()(), "null_kernel", &status); CLCudaAPIError::Check(status, "clCreateKernel"); #endif @@ -842,7 +841,7 @@ class Kernel { static_cast(waitForEventsPlain.size()), !waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr, event)); - #ifdef AMD_HAINAN_WORKAROUND + #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND const std::vector nullRange = {1}; CheckError(clEnqueueNDRangeKernel(queue(), *null_kernel_, static_cast(nullRange.size()), nullptr, nullRange.data(), nullptr, @@ -855,7 +854,7 @@ class Kernel { const cl_kernel& operator()() const { return *kernel_; } private: std::shared_ptr kernel_; - #ifdef AMD_HAINAN_WORKAROUND + #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND std::shared_ptr null_kernel_; #endif -- cgit v1.2.3 From 429ff070f84212a202091c9173bd5d754dc05b51 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 27 Jul 2018 20:53:24 +0200 Subject: Fixed a bug: forgot to initialize the shared pointer for the null kernel --- src/clpp11.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 093214b3..2d411a41 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -448,9 +448,9 @@ class Program { // Source-based constructor with memory management explicit Program(const Context &context, const std::string &source) { #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND - const std::string source_hainan = source + "\n__kernel void null_kernel() {}\n"; - const char *source_ptr = &source_hainan[0]; - const auto length = source_hainan.length(); + const std::string source_null_kernel = source + "\n__kernel void null_kernel() {}\n"; + const char *source_ptr = &source_null_kernel[0]; + const auto length = source_null_kernel.length(); #else const char *source_ptr = &source[0]; const auto length = source.length(); @@ -770,7 +770,14 @@ class Kernel { kernel_(new cl_kernel, [](cl_kernel* k) { if (*k) { CheckErrorDtor(clReleaseKernel(*k)); } delete k; - }) { + }) + #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND + , null_kernel_(new cl_kernel, [](cl_kernel* k) { + if (*k) { CheckErrorDtor(clReleaseKernel(*k)); } + delete k; + }) + #endif + { auto status = CL_SUCCESS; *kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status); CLCudaAPIError::Check(status, "clCreateKernel"); -- cgit v1.2.3 From 2b76bfee97fb775128126506f695535e38ea41ef Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 29 Jul 2018 22:16:27 +0200 Subject: Fixed a wrong event issue causing error -57 --- src/clpp11.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 2d411a41..70da2329 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -852,8 +852,7 @@ class Kernel { const std::vector nullRange = {1}; CheckError(clEnqueueNDRangeKernel(queue(), *null_kernel_, static_cast(nullRange.size()), nullptr, nullRange.data(), nullptr, - static_cast(waitForEventsPlain.size()), - nullptr, event)); + 0, nullptr, nullptr)); #endif } -- cgit v1.2.3