diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-05-15 16:10:56 +0200 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-05-15 16:10:56 +0200 |
commit | 716d7c67d91ef61e3d71e219f61c72859ac823eb (patch) | |
tree | 543af924d3e5557a05a434d332cc6205ee0f07ed | |
parent | 9e36b3b20d5bc69e8744e76f347a3f5e1345778a (diff) |
Fixed a bug in the xGEMM routine related to the event incorrectly set
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cc | 5 | ||||
-rw-r--r-- | test/correctness/tester.cc | 2 |
3 files changed, 5 insertions, 3 deletions
@@ -1,6 +1,7 @@ Development version (next release) - Improved performance of large power-of-2 xGEMM kernels for AMD GPUs +- Fixed a bug in the xGEMM routine related to the event incorrectly set Version 0.7.0 - Added exports to be able to create a DLL on Windows (thanks to Marco Hutter) diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index aa081e81..3699b548 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -184,12 +184,13 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // Launches the kernel auto eventKernel = Event(); - status = RunKernel(kernel, global, local, eventKernel.pointer(), eventWaitList); + auto eventPointer = (!c_no_temp) ? eventKernel.pointer() : event_; + status = RunKernel(kernel, global, local, eventPointer, eventWaitList); if (ErrorIn(status)) { return status; } - eventWaitList.push_back(eventKernel); // Runs the post-processing kernel if needed if (!c_no_temp) { + eventWaitList.push_back(eventKernel); status = PadCopyTransposeMatrix(event_, eventWaitList, m_ceiled, n_ceiled, m_ceiled, 0, c_temp, c_one, c_two, c_ld, c_offset, c_buffer, diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc index 26c4ba59..85ae7091 100644 --- a/test/correctness/tester.cc +++ b/test/correctness/tester.cc @@ -334,7 +334,7 @@ bool TestSimilarity(const T val1, const T val2) { // Set the allowed error margin for floating-point comparisons constexpr auto kErrorMarginRelative = T(0.025); - constexpr auto kErrorMarginAbsolute = T(1.0e-4); + constexpr auto kErrorMarginAbsolute = T(1.0e-3); // Shortcut, handles infinities if (val1 == val2) { |