diff options
Diffstat (limited to 'src/clblast.cpp')
-rw-r--r-- | src/clblast.cpp | 955 |
1 files changed, 502 insertions, 453 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp index 79c30ca4..4bb4e0b3 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -168,13 +168,14 @@ StatusCode Swap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xswap<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSwap(n, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xswap<T>(queue_cpp, event); + routine.DoSwap(n, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Swap<float>(const size_t, cl_mem, const size_t, const size_t, @@ -203,13 +204,14 @@ StatusCode Scal(const size_t n, const T alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xscal<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoScal(n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xscal<T>(queue_cpp, event); + routine.DoScal(n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Scal<float>(const size_t, const float, @@ -238,13 +240,14 @@ StatusCode Copy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xcopy<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoCopy(n, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xcopy<T>(queue_cpp, event); + routine.DoCopy(n, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Copy<float>(const size_t, const cl_mem, const size_t, const size_t, @@ -274,14 +277,15 @@ StatusCode Axpy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xaxpy<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoAxpy(n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xaxpy<T>(queue_cpp, event); + routine.DoAxpy(n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Axpy<float>(const size_t, const float, @@ -316,14 +320,15 @@ StatusCode Dot(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xdot<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoDot(n, - Buffer<T>(dot_buffer), dot_offset, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xdot<T>(queue_cpp, event); + routine.DoDot(n, + Buffer<T>(dot_buffer), dot_offset, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Dot<float>(const size_t, cl_mem, const size_t, @@ -348,14 +353,15 @@ StatusCode Dotu(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xdotu<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoDotu(n, - Buffer<T>(dot_buffer), dot_offset, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xdotu<T>(queue_cpp, event); + routine.DoDotu(n, + Buffer<T>(dot_buffer), dot_offset, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Dotu<float2>(const size_t, cl_mem, const size_t, @@ -375,14 +381,15 @@ StatusCode Dotc(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xdotc<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoDotc(n, - Buffer<T>(dot_buffer), dot_offset, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xdotc<T>(queue_cpp, event); + routine.DoDotc(n, + Buffer<T>(dot_buffer), dot_offset, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Dotc<float2>(const size_t, cl_mem, const size_t, @@ -401,13 +408,14 @@ StatusCode Nrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xnrm2<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoNrm2(n, - Buffer<T>(nrm2_buffer), nrm2_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xnrm2<T>(queue_cpp, event); + routine.DoNrm2(n, + Buffer<T>(nrm2_buffer), nrm2_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Nrm2<float>(const size_t, cl_mem, const size_t, @@ -436,13 +444,14 @@ StatusCode Asum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xasum<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoAsum(n, - Buffer<T>(asum_buffer), asum_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xasum<T>(queue_cpp, event); + routine.DoAsum(n, + Buffer<T>(asum_buffer), asum_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Asum<float>(const size_t, cl_mem, const size_t, @@ -471,13 +480,14 @@ StatusCode Sum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsum<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSum(n, - Buffer<T>(sum_buffer), sum_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsum<T>(queue_cpp, event); + routine.DoSum(n, + Buffer<T>(sum_buffer), sum_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Sum<float>(const size_t, cl_mem, const size_t, @@ -506,13 +516,14 @@ StatusCode Amax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xamax<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoAmax(n, - Buffer<unsigned int>(imax_buffer), imax_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xamax<T>(queue_cpp, event); + routine.DoAmax(n, + Buffer<unsigned int>(imax_buffer), imax_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Amax<float>(const size_t, cl_mem, const size_t, @@ -541,13 +552,14 @@ StatusCode Max(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xmax<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoMax(n, - Buffer<unsigned int>(imax_buffer), imax_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xmax<T>(queue_cpp, event); + routine.DoMax(n, + Buffer<unsigned int>(imax_buffer), imax_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Max<float>(const size_t, cl_mem, const size_t, @@ -576,13 +588,14 @@ StatusCode Min(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xmin<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoMin(n, - Buffer<unsigned int>(imin_buffer), imin_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xmin<T>(queue_cpp, event); + routine.DoMin(n, + Buffer<unsigned int>(imin_buffer), imin_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Min<float>(const size_t, cl_mem, const size_t, @@ -619,17 +632,18 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xgemv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGemv(layout, a_transpose, - m, n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xgemv<T>(queue_cpp, event); + routine.DoGemv(layout, a_transpose, + m, n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Gemv<float>(const Layout, const Transpose, const size_t, const size_t, @@ -682,17 +696,18 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xgbmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGbmv(layout, a_transpose, - m, n, kl, ku, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xgbmv<T>(queue_cpp, event); + routine.DoGbmv(layout, a_transpose, + m, n, kl, ku, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Gbmv<float>(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, @@ -745,17 +760,18 @@ StatusCode Hemv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhemv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHemv(layout, triangle, - n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhemv<T>(queue_cpp, event); + routine.DoHemv(layout, triangle, + n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hemv<float2>(const Layout, const Triangle, const size_t, @@ -784,17 +800,18 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhbmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHbmv(layout, triangle, - n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhbmv<T>(queue_cpp, event); + routine.DoHbmv(layout, triangle, + n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hbmv<float2>(const Layout, const Triangle, const size_t, const size_t, @@ -823,17 +840,18 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhpmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHpmv(layout, triangle, - n, - alpha, - Buffer<T>(ap_buffer), ap_offset, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhpmv<T>(queue_cpp, event); + routine.DoHpmv(layout, triangle, + n, + alpha, + Buffer<T>(ap_buffer), ap_offset, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hpmv<float2>(const Layout, const Triangle, const size_t, @@ -862,17 +880,18 @@ StatusCode Symv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsymv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSymv(layout, triangle, - n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsymv<T>(queue_cpp, event); + routine.DoSymv(layout, triangle, + n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Symv<float>(const Layout, const Triangle, const size_t, @@ -909,17 +928,18 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsbmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSbmv(layout, triangle, - n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsbmv<T>(queue_cpp, event); + routine.DoSbmv(layout, triangle, + n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Sbmv<float>(const Layout, const Triangle, const size_t, const size_t, @@ -956,17 +976,18 @@ StatusCode Spmv(const Layout layout, const Triangle triangle, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xspmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSpmv(layout, triangle, - n, - alpha, - Buffer<T>(ap_buffer), ap_offset, - Buffer<T>(x_buffer), x_offset, x_inc, - beta, - Buffer<T>(y_buffer), y_offset, y_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xspmv<T>(queue_cpp, event); + routine.DoSpmv(layout, triangle, + n, + alpha, + Buffer<T>(ap_buffer), ap_offset, + Buffer<T>(x_buffer), x_offset, x_inc, + beta, + Buffer<T>(y_buffer), y_offset, y_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Spmv<float>(const Layout, const Triangle, const size_t, @@ -1000,14 +1021,15 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xtrmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoTrmv(layout, triangle, a_transpose, diagonal, - n, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xtrmv<T>(queue_cpp, event); + routine.DoTrmv(layout, triangle, a_transpose, diagonal, + n, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Trmv<float>(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, @@ -1042,14 +1064,15 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xtbmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoTbmv(layout, triangle, a_transpose, diagonal, - n, k, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xtbmv<T>(queue_cpp, event); + routine.DoTbmv(layout, triangle, a_transpose, diagonal, + n, k, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Tbmv<float>(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, @@ -1084,14 +1107,15 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_ const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xtpmv<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoTpmv(layout, triangle, a_transpose, diagonal, - n, - Buffer<T>(ap_buffer), ap_offset, - Buffer<T>(x_buffer), x_offset, x_inc); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xtpmv<T>(queue_cpp, event); + routine.DoTpmv(layout, triangle, a_transpose, diagonal, + n, + Buffer<T>(ap_buffer), ap_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Tpmv<float>(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, @@ -1218,16 +1242,17 @@ StatusCode Ger(const Layout layout, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xger<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGer(layout, - m, n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xger<T>(queue_cpp, event); + routine.DoGer(layout, + m, n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Ger<float>(const Layout, const size_t, const size_t, @@ -1260,16 +1285,17 @@ StatusCode Geru(const Layout layout, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xgeru<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGeru(layout, - m, n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xgeru<T>(queue_cpp, event); + routine.DoGeru(layout, + m, n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Geru<float2>(const Layout, const size_t, const size_t, @@ -1295,16 +1321,17 @@ StatusCode Gerc(const Layout layout, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xgerc<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGerc(layout, - m, n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xgerc<T>(queue_cpp, event); + routine.DoGerc(layout, + m, n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Gerc<float2>(const Layout, const size_t, const size_t, @@ -1329,15 +1356,16 @@ StatusCode Her(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xher<std::complex<T>,T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHer(layout, triangle, - n, - alpha, - Buffer<std::complex<T>>(x_buffer), x_offset, x_inc, - Buffer<std::complex<T>>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xher<std::complex<T>,T>(queue_cpp, event); + routine.DoHer(layout, triangle, + n, + alpha, + Buffer<std::complex<T>>(x_buffer), x_offset, x_inc, + Buffer<std::complex<T>>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Her<float>(const Layout, const Triangle, const size_t, @@ -1360,15 +1388,16 @@ StatusCode Hpr(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHpr(layout, triangle, - n, - alpha, - Buffer<std::complex<T>>(x_buffer), x_offset, x_inc, - Buffer<std::complex<T>>(ap_buffer), ap_offset); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event); + routine.DoHpr(layout, triangle, + n, + alpha, + Buffer<std::complex<T>>(x_buffer), x_offset, x_inc, + Buffer<std::complex<T>>(ap_buffer), ap_offset); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hpr<float>(const Layout, const Triangle, const size_t, @@ -1392,16 +1421,17 @@ StatusCode Her2(const Layout layout, const Triangle triangle, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xher2<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHer2(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xher2<T>(queue_cpp, event); + routine.DoHer2(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Her2<float2>(const Layout, const Triangle, const size_t, @@ -1427,16 +1457,17 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhpr2<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHpr2(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(ap_buffer), ap_offset); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhpr2<T>(queue_cpp, event); + routine.DoHpr2(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(ap_buffer), ap_offset); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hpr2<float2>(const Layout, const Triangle, const size_t, @@ -1461,15 +1492,16 @@ StatusCode Syr(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsyr<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSyr(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsyr<T>(queue_cpp, event); + routine.DoSyr(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Syr<float>(const Layout, const Triangle, const size_t, @@ -1498,15 +1530,16 @@ StatusCode Spr(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xspr<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSpr(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(ap_buffer), ap_offset); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xspr<T>(queue_cpp, event); + routine.DoSpr(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(ap_buffer), ap_offset); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Spr<float>(const Layout, const Triangle, const size_t, @@ -1536,16 +1569,17 @@ StatusCode Syr2(const Layout layout, const Triangle triangle, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsyr2<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSyr2(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(a_buffer), a_offset, a_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsyr2<T>(queue_cpp, event); + routine.DoSyr2(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(a_buffer), a_offset, a_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Syr2<float>(const Layout, const Triangle, const size_t, @@ -1578,16 +1612,17 @@ StatusCode Spr2(const Layout layout, const Triangle triangle, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xspr2<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSpr2(layout, triangle, - n, - alpha, - Buffer<T>(x_buffer), x_offset, x_inc, - Buffer<T>(y_buffer), y_offset, y_inc, - Buffer<T>(ap_buffer), ap_offset); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xspr2<T>(queue_cpp, event); + routine.DoSpr2(layout, triangle, + n, + alpha, + Buffer<T>(x_buffer), x_offset, x_inc, + Buffer<T>(y_buffer), y_offset, y_inc, + Buffer<T>(ap_buffer), ap_offset); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Spr2<float>(const Layout, const Triangle, const size_t, @@ -1625,17 +1660,18 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xgemm<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoGemm(layout, a_transpose, b_transpose, - m, n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xgemm<T>(queue_cpp, event); + routine.DoGemm(layout, a_transpose, b_transpose, + m, n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Gemm<float>(const Layout, const Transpose, const Transpose, const size_t, const size_t, const size_t, @@ -1688,17 +1724,18 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsymm<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSymm(layout, side, triangle, - m, n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsymm<T>(queue_cpp, event); + routine.DoSymm(layout, side, triangle, + m, n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Symm<float>(const Layout, const Side, const Triangle, const size_t, const size_t, @@ -1751,17 +1788,18 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xhemm<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHemm(layout, side, triangle, - m, n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xhemm<T>(queue_cpp, event); + routine.DoHemm(layout, side, triangle, + m, n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Hemm<float2>(const Layout, const Side, const Triangle, const size_t, const size_t, @@ -1789,16 +1827,17 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_ const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsyrk<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSyrk(layout, triangle, a_transpose, - n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsyrk<T>(queue_cpp, event); + routine.DoSyrk(layout, triangle, a_transpose, + n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Syrk<float>(const Layout, const Triangle, const Transpose, const size_t, const size_t, @@ -1845,16 +1884,17 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_ const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xherk<std::complex<T>,T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHerk(layout, triangle, a_transpose, - n, k, - alpha, - Buffer<std::complex<T>>(a_buffer), a_offset, a_ld, - beta, - Buffer<std::complex<T>>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xherk<std::complex<T>,T>(queue_cpp, event); + routine.DoHerk(layout, triangle, a_transpose, + n, k, + alpha, + Buffer<std::complex<T>>(a_buffer), a_offset, a_ld, + beta, + Buffer<std::complex<T>>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Herk<float>(const Layout, const Triangle, const Transpose, const size_t, const size_t, @@ -1881,17 +1921,18 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xsyr2k<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoSyr2k(layout, triangle, ab_transpose, - n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xsyr2k<T>(queue_cpp, event); + routine.DoSyr2k(layout, triangle, ab_transpose, + n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Syr2k<float>(const Layout, const Triangle, const Transpose, const size_t, const size_t, @@ -1944,17 +1985,18 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a const U beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xher2k<T,U>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoHer2k(layout, triangle, ab_transpose, - n, k, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld, - beta, - Buffer<T>(c_buffer), c_offset, c_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xher2k<T,U>(queue_cpp, event); + routine.DoHer2k(layout, triangle, ab_transpose, + n, k, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld, + beta, + Buffer<T>(c_buffer), c_offset, c_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Her2k<float2,float>(const Layout, const Triangle, const Transpose, const size_t, const size_t, @@ -1981,15 +2023,16 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xtrmm<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoTrmm(layout, side, triangle, a_transpose, diagonal, - m, n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xtrmm<T>(queue_cpp, event); + routine.DoTrmm(layout, side, triangle, a_transpose, diagonal, + m, n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Trmm<float>(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, @@ -2075,15 +2118,16 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_command_queue* queue, cl_event* event) { - auto queue_cpp = Queue(*queue); - auto routine = Xomatcopy<T>(queue_cpp, event); - auto status = routine.SetUp(); - if (status != StatusCode::kSuccess) { return status; } - return routine.DoOmatcopy(layout, a_transpose, - m, n, - alpha, - Buffer<T>(a_buffer), a_offset, a_ld, - Buffer<T>(b_buffer), b_offset, b_ld); + try { + auto queue_cpp = Queue(*queue); + auto routine = Xomatcopy<T>(queue_cpp, event); + routine.DoOmatcopy(layout, a_transpose, + m, n, + alpha, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(b_buffer), b_offset, b_ld); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Omatcopy<float>(const Layout, const Transpose, const size_t, const size_t, @@ -2119,7 +2163,12 @@ template StatusCode PUBLIC_API Omatcopy<half>(const Layout, const Transpose, // ================================================================================================= // Clears the cache of stored binaries -StatusCode ClearCache() { return CacheClearAll(); } +StatusCode ClearCache() { + try { + CacheClearAll(); + } catch (...) { return DispatchException(); } + return StatusCode::kSuccess; +} // Fills the cache with all binaries for a specific device // TODO: Add half-precision FP16 set-up calls @@ -2132,59 +2181,59 @@ StatusCode FillCache(const cl_device_id device) { auto queue = Queue(context, device_cpp); // Runs all the level 1 set-up functions - Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp(); - Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp(); - Xscal<float>(queue, nullptr).SetUp(); Xscal<double>(queue, nullptr).SetUp(); Xscal<float2>(queue, nullptr).SetUp(); Xscal<double2>(queue, nullptr).SetUp(); - Xcopy<float>(queue, nullptr).SetUp(); Xcopy<double>(queue, nullptr).SetUp(); Xcopy<float2>(queue, nullptr).SetUp(); Xcopy<double2>(queue, nullptr).SetUp(); - Xaxpy<float>(queue, nullptr).SetUp(); Xaxpy<double>(queue, nullptr).SetUp(); Xaxpy<float2>(queue, nullptr).SetUp(); Xaxpy<double2>(queue, nullptr).SetUp(); - Xdot<float>(queue, nullptr).SetUp(); Xdot<double>(queue, nullptr).SetUp(); - Xdotu<float2>(queue, nullptr).SetUp(); Xdotu<double2>(queue, nullptr).SetUp(); - Xdotc<float2>(queue, nullptr).SetUp(); Xdotc<double2>(queue, nullptr).SetUp(); - Xnrm2<float>(queue, nullptr).SetUp(); Xnrm2<double>(queue, nullptr).SetUp(); Xnrm2<float2>(queue, nullptr).SetUp(); Xnrm2<double2>(queue, nullptr).SetUp(); - Xasum<float>(queue, nullptr).SetUp(); Xasum<double>(queue, nullptr).SetUp(); Xasum<float2>(queue, nullptr).SetUp(); Xasum<double2>(queue, nullptr).SetUp(); - Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp(); - Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp(); - Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp(); - Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp(); + Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr); + Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr); + Xscal<float>(queue, nullptr); Xscal<double>(queue, nullptr); Xscal<float2>(queue, nullptr); Xscal<double2>(queue, nullptr); + Xcopy<float>(queue, nullptr); Xcopy<double>(queue, nullptr); Xcopy<float2>(queue, nullptr); Xcopy<double2>(queue, nullptr); + Xaxpy<float>(queue, nullptr); Xaxpy<double>(queue, nullptr); Xaxpy<float2>(queue, nullptr); Xaxpy<double2>(queue, nullptr); + Xdot<float>(queue, nullptr); Xdot<double>(queue, nullptr); + Xdotu<float2>(queue, nullptr); Xdotu<double2>(queue, nullptr); + Xdotc<float2>(queue, nullptr); Xdotc<double2>(queue, nullptr); + Xnrm2<float>(queue, nullptr); Xnrm2<double>(queue, nullptr); Xnrm2<float2>(queue, nullptr); Xnrm2<double2>(queue, nullptr); + Xasum<float>(queue, nullptr); Xasum<double>(queue, nullptr); Xasum<float2>(queue, nullptr); Xasum<double2>(queue, nullptr); + Xsum<float>(queue, nullptr); Xsum<double>(queue, nullptr); Xsum<float2>(queue, nullptr); Xsum<double2>(queue, nullptr); + Xamax<float>(queue, nullptr); Xamax<double>(queue, nullptr); Xamax<float2>(queue, nullptr); Xamax<double2>(queue, nullptr); + Xmax<float>(queue, nullptr); Xmax<double>(queue, nullptr); Xmax<float2>(queue, nullptr); Xmax<double2>(queue, nullptr); + Xmin<float>(queue, nullptr); Xmin<double>(queue, nullptr); Xmin<float2>(queue, nullptr); Xmin<double2>(queue, nullptr); // Runs all the level 2 set-up functions - Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp(); - Xgbmv<float>(queue, nullptr).SetUp(); Xgbmv<double>(queue, nullptr).SetUp(); Xgbmv<float2>(queue, nullptr).SetUp(); Xgbmv<double2>(queue, nullptr).SetUp(); - Xhemv<float2>(queue, nullptr).SetUp(); Xhemv<double2>(queue, nullptr).SetUp(); - Xhbmv<float2>(queue, nullptr).SetUp(); Xhbmv<double2>(queue, nullptr).SetUp(); - Xhpmv<float2>(queue, nullptr).SetUp(); Xhpmv<double2>(queue, nullptr).SetUp(); - Xsymv<float>(queue, nullptr).SetUp(); Xsymv<double>(queue, nullptr).SetUp(); - Xsbmv<float>(queue, nullptr).SetUp(); Xsbmv<double>(queue, nullptr).SetUp(); - Xspmv<float>(queue, nullptr).SetUp(); Xspmv<double>(queue, nullptr).SetUp(); - Xtrmv<float>(queue, nullptr).SetUp(); Xtrmv<double>(queue, nullptr).SetUp(); Xtrmv<float2>(queue, nullptr).SetUp(); Xtrmv<double2>(queue, nullptr).SetUp(); - Xtbmv<float>(queue, nullptr).SetUp(); Xtbmv<double>(queue, nullptr).SetUp(); Xtbmv<float2>(queue, nullptr).SetUp(); Xtbmv<double2>(queue, nullptr).SetUp(); - Xtpmv<float>(queue, nullptr).SetUp(); Xtpmv<double>(queue, nullptr).SetUp(); Xtpmv<float2>(queue, nullptr).SetUp(); Xtpmv<double2>(queue, nullptr).SetUp(); - Xger<float>(queue, nullptr).SetUp(); Xger<double>(queue, nullptr).SetUp(); - Xgeru<float2>(queue, nullptr).SetUp(); Xgeru<double2>(queue, nullptr).SetUp(); - Xgerc<float2>(queue, nullptr).SetUp(); Xgerc<double2>(queue, nullptr).SetUp(); - Xher<float2,float>(queue, nullptr).SetUp(); Xher<double2,double>(queue, nullptr).SetUp(); - Xhpr<float2,float>(queue, nullptr).SetUp(); Xhpr<double2,double>(queue, nullptr).SetUp(); - Xher2<float2>(queue, nullptr).SetUp(); Xher2<double2>(queue, nullptr).SetUp(); - Xhpr2<float2>(queue, nullptr).SetUp(); Xhpr2<double2>(queue, nullptr).SetUp(); - Xsyr<float>(queue, nullptr).SetUp(); Xsyr<double>(queue, nullptr).SetUp(); - Xspr<float>(queue, nullptr).SetUp(); Xspr<double>(queue, nullptr).SetUp(); - Xsyr2<float>(queue, nullptr).SetUp(); Xsyr2<double>(queue, nullptr).SetUp(); - Xspr2<float>(queue, nullptr).SetUp(); Xspr2<double>(queue, nullptr).SetUp(); + Xgemv<float>(queue, nullptr); Xgemv<double>(queue, nullptr); Xgemv<float2>(queue, nullptr); Xgemv<double2>(queue, nullptr); + Xgbmv<float>(queue, nullptr); Xgbmv<double>(queue, nullptr); Xgbmv<float2>(queue, nullptr); Xgbmv<double2>(queue, nullptr); + Xhemv<float2>(queue, nullptr); Xhemv<double2>(queue, nullptr); + Xhbmv<float2>(queue, nullptr); Xhbmv<double2>(queue, nullptr); + Xhpmv<float2>(queue, nullptr); Xhpmv<double2>(queue, nullptr); + Xsymv<float>(queue, nullptr); Xsymv<double>(queue, nullptr); + Xsbmv<float>(queue, nullptr); Xsbmv<double>(queue, nullptr); + Xspmv<float>(queue, nullptr); Xspmv<double>(queue, nullptr); + Xtrmv<float>(queue, nullptr); Xtrmv<double>(queue, nullptr); Xtrmv<float2>(queue, nullptr); Xtrmv<double2>(queue, nullptr); + Xtbmv<float>(queue, nullptr); Xtbmv<double>(queue, nullptr); Xtbmv<float2>(queue, nullptr); Xtbmv<double2>(queue, nullptr); + Xtpmv<float>(queue, nullptr); Xtpmv<double>(queue, nullptr); Xtpmv<float2>(queue, nullptr); Xtpmv<double2>(queue, nullptr); + Xger<float>(queue, nullptr); Xger<double>(queue, nullptr); + Xgeru<float2>(queue, nullptr); Xgeru<double2>(queue, nullptr); + Xgerc<float2>(queue, nullptr); Xgerc<double2>(queue, nullptr); + Xher<float2,float>(queue, nullptr); Xher<double2,double>(queue, nullptr); + Xhpr<float2,float>(queue, nullptr); Xhpr<double2,double>(queue, nullptr); + Xher2<float2>(queue, nullptr); Xher2<double2>(queue, nullptr); + Xhpr2<float2>(queue, nullptr); Xhpr2<double2>(queue, nullptr); + Xsyr<float>(queue, nullptr); Xsyr<double>(queue, nullptr); + Xspr<float>(queue, nullptr); Xspr<double>(queue, nullptr); + Xsyr2<float>(queue, nullptr); Xsyr2<double>(queue, nullptr); + Xspr2<float>(queue, nullptr); Xspr2<double>(queue, nullptr); // Runs all the level 3 set-up functions - Xgemm<float>(queue, nullptr).SetUp(); Xgemm<double>(queue, nullptr).SetUp(); Xgemm<float2>(queue, nullptr).SetUp(); Xgemm<double2>(queue, nullptr).SetUp(); - Xsymm<float>(queue, nullptr).SetUp(); Xsymm<double>(queue, nullptr).SetUp(); Xsymm<float2>(queue, nullptr).SetUp(); Xsymm<double2>(queue, nullptr).SetUp(); - Xhemm<float2>(queue, nullptr).SetUp(); Xhemm<double2>(queue, nullptr).SetUp(); - Xsyrk<float>(queue, nullptr).SetUp(); Xsyrk<double>(queue, nullptr).SetUp(); Xsyrk<float2>(queue, nullptr).SetUp(); Xsyrk<double2>(queue, nullptr).SetUp(); - Xherk<float2,float>(queue, nullptr).SetUp(); Xherk<double2,double>(queue, nullptr).SetUp(); - Xsyr2k<float>(queue, nullptr).SetUp(); Xsyr2k<double>(queue, nullptr).SetUp(); Xsyr2k<float2>(queue, nullptr).SetUp(); Xsyr2k<double2>(queue, nullptr).SetUp(); - Xher2k<float2,float>(queue, nullptr).SetUp(); Xher2k<double2,double>(queue, nullptr).SetUp(); - Xtrmm<float>(queue, nullptr).SetUp(); Xtrmm<double>(queue, nullptr).SetUp(); Xtrmm<float2>(queue, nullptr).SetUp(); Xtrmm<double2>(queue, nullptr).SetUp(); + Xgemm<float>(queue, nullptr); Xgemm<double>(queue, nullptr); Xgemm<float2>(queue, nullptr); Xgemm<double2>(queue, nullptr); + Xsymm<float>(queue, nullptr); Xsymm<double>(queue, nullptr); Xsymm<float2>(queue, nullptr); Xsymm<double2>(queue, nullptr); + Xhemm<float2>(queue, nullptr); Xhemm<double2>(queue, nullptr); + Xsyrk<float>(queue, nullptr); Xsyrk<double>(queue, nullptr); Xsyrk<float2>(queue, nullptr); Xsyrk<double2>(queue, nullptr); + Xherk<float2,float>(queue, nullptr); Xherk<double2,double>(queue, nullptr); + Xsyr2k<float>(queue, nullptr); Xsyr2k<double>(queue, nullptr); Xsyr2k<float2>(queue, nullptr); Xsyr2k<double2>(queue, nullptr); + Xher2k<float2,float>(queue, nullptr); Xher2k<double2,double>(queue, nullptr); + Xtrmm<float>(queue, nullptr); Xtrmm<double>(queue, nullptr); Xtrmm<float2>(queue, nullptr); Xtrmm<double2>(queue, nullptr); // Runs all the level 3 set-up functions - Xomatcopy<float>(queue, nullptr).SetUp(); Xomatcopy<double>(queue, nullptr).SetUp(); Xomatcopy<float2>(queue, nullptr).SetUp(); Xomatcopy<double2>(queue, nullptr).SetUp(); + Xomatcopy<float>(queue, nullptr); Xomatcopy<double>(queue, nullptr); Xomatcopy<float2>(queue, nullptr); Xomatcopy<double2>(queue, nullptr); - } catch (...) { return StatusCode::kBuildProgramFailure; } + } catch (...) { return DispatchException(); } return StatusCode::kSuccess; } |