summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-07-31 21:49:53 +0200
committerGitHub <noreply@github.com>2018-07-31 21:49:53 +0200
commitbed10d273118e39ef49cf2aea7d69c4194a8384f (patch)
tree2e034a7c9751741bb6b2b03a0e619d2f54dcd4cf /src
parentd749c4af7201a462f3ee2f2105528d14a9efa4d1 (diff)
parent2b76bfee97fb775128126506f695535e38ea41ef (diff)
Merge pull request #308 from CNugteren/CLBlast-301-weird-AMD-Hainan-bug
Added workaround for AMD Southern Islands GPU issue
Diffstat (limited to 'src')
-rw-r--r--src/clpp11.hpp32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index 8ac0523f..70da2329 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -447,8 +447,14 @@ class Program {
// Source-based constructor with memory management
explicit Program(const Context &context, const std::string &source) {
- const char *source_ptr = &source[0];
- const auto length = source.length();
+ #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND
+ const std::string source_null_kernel = source + "\n__kernel void null_kernel() {}\n";
+ const char *source_ptr = &source_null_kernel[0];
+ const auto length = source_null_kernel.length();
+ #else
+ const char *source_ptr = &source[0];
+ const auto length = source.length();
+ #endif
auto status = CL_SUCCESS;
program_ = clCreateProgramWithSource(context(), 1, &source_ptr, &length, &status);
CLCudaAPIError::Check(status, "clCreateProgramWithSource");
@@ -764,10 +770,21 @@ class Kernel {
kernel_(new cl_kernel, [](cl_kernel* k) {
if (*k) { CheckErrorDtor(clReleaseKernel(*k)); }
delete k;
- }) {
+ })
+ #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND
+ , null_kernel_(new cl_kernel, [](cl_kernel* k) {
+ if (*k) { CheckErrorDtor(clReleaseKernel(*k)); }
+ delete k;
+ })
+ #endif
+ {
auto status = CL_SUCCESS;
*kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status);
CLCudaAPIError::Check(status, "clCreateKernel");
+ #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND
+ *null_kernel_ = clCreateKernel(program->operator()(), "null_kernel", &status);
+ CLCudaAPIError::Check(status, "clCreateKernel");
+ #endif
}
// Sets a kernel argument at the indicated position
@@ -831,12 +848,21 @@ class Kernel {
static_cast<cl_uint>(waitForEventsPlain.size()),
!waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr,
event));
+ #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND
+ const std::vector<size_t> nullRange = {1};
+ CheckError(clEnqueueNDRangeKernel(queue(), *null_kernel_, static_cast<cl_uint>(nullRange.size()),
+ nullptr, nullRange.data(), nullptr,
+ 0, nullptr, nullptr));
+ #endif
}
// Accessor to the private data-member
const cl_kernel& operator()() const { return *kernel_; }
private:
std::shared_ptr<cl_kernel> kernel_;
+ #ifdef AMD_SI_EMPTY_KERNEL_WORKAROUND
+ std::shared_ptr<cl_kernel> null_kernel_;
+ #endif
// Internal implementation for the recursive SetArguments function.
template <typename T>