From b98af44fcf89b9946e1de438b1f5527e6bf28905 Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Sat, 22 Oct 2016 05:14:19 +0300 Subject: treewide: use C++ exceptions properly Since the codebase is designed around proper C++ idioms such as RAII, it makes sense to only use C++ exceptions internally instead of mixing exceptions and error codes. The exceptions are now caught at top level to preserve compatibility with the existing error code-based API. Note that we deliberately do not catch C++ runtime errors (such as `std::bad_alloc`) nor logic errors (aka failed assertions) because no actual handling can ever happen for such errors. However, in the C interface we do catch _all_ exceptions (...) and convert them into a wild-card error code. --- src/routines/level2/xger.cpp | 77 +++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) (limited to 'src/routines/level2/xger.cpp') diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp index 29cffe0c..f22131bb 100644 --- a/src/routines/level2/xger.cpp +++ b/src/routines/level2/xger.cpp @@ -33,15 +33,15 @@ Xger::Xger(Queue &queue, EventPointer event, const std::string &name): // The main routine template -StatusCode Xger::DoGer(const Layout layout, - const size_t m, const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { +void Xger::DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { // Makes sure all dimensions are larger than zero - if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } + if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); } // Computes whether or not the matrix has an alternative layout (row or column-major). const auto a_is_rowmajor = (layout == Layout::kRowMajor); @@ -49,44 +49,35 @@ StatusCode Xger::DoGer(const Layout layout, const auto a_two = (a_is_rowmajor) ? m : n; // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); - if (ErrorIn(status)) { return status; } - status = TestVectorX(m, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } + TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); + TestVectorX(m, x_buffer, x_offset, x_inc); + TestVectorY(n, y_buffer, y_offset, y_inc); // Retrieves the kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, "Xger"); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast(a_one)); - kernel.SetArgument(1, static_cast(a_two)); - kernel.SetArgument(2, GetRealArg(alpha)); - kernel.SetArgument(3, x_buffer()); - kernel.SetArgument(4, static_cast(x_offset)); - kernel.SetArgument(5, static_cast(x_inc)); - kernel.SetArgument(6, y_buffer()); - kernel.SetArgument(7, static_cast(y_offset)); - kernel.SetArgument(8, static_cast(y_inc)); - kernel.SetArgument(9, a_buffer()); - kernel.SetArgument(10, static_cast(a_offset)); - kernel.SetArgument(11, static_cast(a_ld)); - kernel.SetArgument(12, static_cast(a_is_rowmajor)); - - // Launches the kernel - auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]); - auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); - auto global = std::vector{a_one_ceiled, a_two_ceiled}; - auto local = std::vector{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } + const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); + auto kernel = Kernel(program, "Xger"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(a_one)); + kernel.SetArgument(1, static_cast(a_two)); + kernel.SetArgument(2, GetRealArg(alpha)); + kernel.SetArgument(3, x_buffer()); + kernel.SetArgument(4, static_cast(x_offset)); + kernel.SetArgument(5, static_cast(x_inc)); + kernel.SetArgument(6, y_buffer()); + kernel.SetArgument(7, static_cast(y_offset)); + kernel.SetArgument(8, static_cast(y_inc)); + kernel.SetArgument(9, a_buffer()); + kernel.SetArgument(10, static_cast(a_offset)); + kernel.SetArgument(11, static_cast(a_ld)); + kernel.SetArgument(12, static_cast(a_is_rowmajor)); + + // Launches the kernel + auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]); + auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); + auto global = std::vector{a_one_ceiled, a_two_ceiled}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + RunKernel(kernel, queue_, device_, global, local, event_); } // ================================================================================================= -- cgit v1.2.3 From 56f300607b1d0b81ab3269894fda5a066c46cdeb Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Tue, 18 Oct 2016 04:53:06 +0300 Subject: Routine: get rid of ::SetUp() Since we now use C++ exceptions inside the implementation (and exceptions can be thrown from constructors), there is no need for a separate Routine::SetUp() function. For this, we also change the way how the kernel source string is constructed. The kernel-specific source code is now passed to the Routine ctor via an initializer_list of C strings to avoid unnecessary data copying while also working around C1091 of MSVC 2013. --- scripts/generator/generator/cpp.py | 1 - src/clblast.cpp | 134 +++++++++++++------------------------ src/routine.cpp | 39 +++++------ src/routine.hpp | 10 ++- src/routines/level1/xamax.cpp | 5 +- src/routines/level1/xasum.cpp | 5 +- src/routines/level1/xaxpy.cpp | 5 +- src/routines/level1/xcopy.cpp | 5 +- src/routines/level1/xdot.cpp | 5 +- src/routines/level1/xnrm2.cpp | 5 +- src/routines/level1/xscal.cpp | 5 +- src/routines/level1/xswap.cpp | 5 +- src/routines/level2/xgemv.cpp | 5 +- src/routines/level2/xger.cpp | 5 +- src/routines/level2/xher.cpp | 5 +- src/routines/level2/xher2.cpp | 5 +- src/routines/level3/xgemm.cpp | 9 +-- src/routines/level3/xher2k.cpp | 5 +- src/routines/level3/xherk.cpp | 5 +- src/routines/level3/xsyr2k.cpp | 5 +- src/routines/level3/xsyrk.cpp | 5 +- src/routines/levelx/xomatcopy.cpp | 5 +- 22 files changed, 104 insertions(+), 174 deletions(-) (limited to 'src/routines/level2/xger.cpp') diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 507b0d28..a0d43667 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -52,7 +52,6 @@ def clblast_cc(routine): result += " try {" + NL result += " auto queue_cpp = Queue(*queue);" + NL result += " auto routine = X" + routine.name + "<" + routine.template.template + ">(queue_cpp, event);" + NL - result += " routine.SetUp();" + NL result += " routine.Do" + routine.name.capitalize() + "(" result += ("," + NL + indent1).join([a for a in routine.arguments_clcudaapi()]) result += ");" + NL diff --git a/src/clblast.cpp b/src/clblast.cpp index 8e7b042f..4bb4e0b3 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -171,7 +171,6 @@ StatusCode Swap(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xswap(queue_cpp, event); - routine.SetUp(); routine.DoSwap(n, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); @@ -208,7 +207,6 @@ StatusCode Scal(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xscal(queue_cpp, event); - routine.SetUp(); routine.DoScal(n, alpha, Buffer(x_buffer), x_offset, x_inc); @@ -245,7 +243,6 @@ StatusCode Copy(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xcopy(queue_cpp, event); - routine.SetUp(); routine.DoCopy(n, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); @@ -283,7 +280,6 @@ StatusCode Axpy(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xaxpy(queue_cpp, event); - routine.SetUp(); routine.DoAxpy(n, alpha, Buffer(x_buffer), x_offset, x_inc, @@ -327,7 +323,6 @@ StatusCode Dot(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdot(queue_cpp, event); - routine.SetUp(); routine.DoDot(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, @@ -361,7 +356,6 @@ StatusCode Dotu(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdotu(queue_cpp, event); - routine.SetUp(); routine.DoDotu(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, @@ -390,7 +384,6 @@ StatusCode Dotc(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdotc(queue_cpp, event); - routine.SetUp(); routine.DoDotc(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, @@ -418,7 +411,6 @@ StatusCode Nrm2(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xnrm2(queue_cpp, event); - routine.SetUp(); routine.DoNrm2(n, Buffer(nrm2_buffer), nrm2_offset, Buffer(x_buffer), x_offset, x_inc); @@ -455,7 +447,6 @@ StatusCode Asum(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xasum(queue_cpp, event); - routine.SetUp(); routine.DoAsum(n, Buffer(asum_buffer), asum_offset, Buffer(x_buffer), x_offset, x_inc); @@ -492,7 +483,6 @@ StatusCode Sum(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xsum(queue_cpp, event); - routine.SetUp(); routine.DoSum(n, Buffer(sum_buffer), sum_offset, Buffer(x_buffer), x_offset, x_inc); @@ -529,7 +519,6 @@ StatusCode Amax(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xamax(queue_cpp, event); - routine.SetUp(); routine.DoAmax(n, Buffer(imax_buffer), imax_offset, Buffer(x_buffer), x_offset, x_inc); @@ -566,7 +555,6 @@ StatusCode Max(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xmax(queue_cpp, event); - routine.SetUp(); routine.DoMax(n, Buffer(imax_buffer), imax_offset, Buffer(x_buffer), x_offset, x_inc); @@ -603,7 +591,6 @@ StatusCode Min(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xmin(queue_cpp, event); - routine.SetUp(); routine.DoMin(n, Buffer(imin_buffer), imin_offset, Buffer(x_buffer), x_offset, x_inc); @@ -648,7 +635,6 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xgemv(queue_cpp, event); - routine.SetUp(); routine.DoGemv(layout, a_transpose, m, n, alpha, @@ -713,7 +699,6 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xgbmv(queue_cpp, event); - routine.SetUp(); routine.DoGbmv(layout, a_transpose, m, n, kl, ku, alpha, @@ -778,7 +763,6 @@ StatusCode Hemv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhemv(queue_cpp, event); - routine.SetUp(); routine.DoHemv(layout, triangle, n, alpha, @@ -819,7 +803,6 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhbmv(queue_cpp, event); - routine.SetUp(); routine.DoHbmv(layout, triangle, n, k, alpha, @@ -860,7 +843,6 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpmv(queue_cpp, event); - routine.SetUp(); routine.DoHpmv(layout, triangle, n, alpha, @@ -901,7 +883,6 @@ StatusCode Symv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsymv(queue_cpp, event); - routine.SetUp(); routine.DoSymv(layout, triangle, n, alpha, @@ -950,7 +931,6 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsbmv(queue_cpp, event); - routine.SetUp(); routine.DoSbmv(layout, triangle, n, k, alpha, @@ -999,7 +979,6 @@ StatusCode Spmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspmv(queue_cpp, event); - routine.SetUp(); routine.DoSpmv(layout, triangle, n, alpha, @@ -1045,7 +1024,6 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtrmv(queue_cpp, event); - routine.SetUp(); routine.DoTrmv(layout, triangle, a_transpose, diagonal, n, Buffer(a_buffer), a_offset, a_ld, @@ -1089,7 +1067,6 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtbmv(queue_cpp, event); - routine.SetUp(); routine.DoTbmv(layout, triangle, a_transpose, diagonal, n, k, Buffer(a_buffer), a_offset, a_ld, @@ -1133,7 +1110,6 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtpmv(queue_cpp, event); - routine.SetUp(); routine.DoTpmv(layout, triangle, a_transpose, diagonal, n, Buffer(ap_buffer), ap_offset, @@ -1269,7 +1245,6 @@ StatusCode Ger(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xger(queue_cpp, event); - routine.SetUp(); routine.DoGer(layout, m, n, alpha, @@ -1313,7 +1288,6 @@ StatusCode Geru(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xgeru(queue_cpp, event); - routine.SetUp(); routine.DoGeru(layout, m, n, alpha, @@ -1350,7 +1324,6 @@ StatusCode Gerc(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xgerc(queue_cpp, event); - routine.SetUp(); routine.DoGerc(layout, m, n, alpha, @@ -1386,7 +1359,6 @@ StatusCode Her(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xher,T>(queue_cpp, event); - routine.SetUp(); routine.DoHer(layout, triangle, n, alpha, @@ -1419,7 +1391,6 @@ StatusCode Hpr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpr,T>(queue_cpp, event); - routine.SetUp(); routine.DoHpr(layout, triangle, n, alpha, @@ -1453,7 +1424,6 @@ StatusCode Her2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xher2(queue_cpp, event); - routine.SetUp(); routine.DoHer2(layout, triangle, n, alpha, @@ -1490,7 +1460,6 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpr2(queue_cpp, event); - routine.SetUp(); routine.DoHpr2(layout, triangle, n, alpha, @@ -1526,7 +1495,6 @@ StatusCode Syr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsyr(queue_cpp, event); - routine.SetUp(); routine.DoSyr(layout, triangle, n, alpha, @@ -1565,7 +1533,6 @@ StatusCode Spr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspr(queue_cpp, event); - routine.SetUp(); routine.DoSpr(layout, triangle, n, alpha, @@ -1605,7 +1572,6 @@ StatusCode Syr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsyr2(queue_cpp, event); - routine.SetUp(); routine.DoSyr2(layout, triangle, n, alpha, @@ -1649,7 +1615,6 @@ StatusCode Spr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspr2(queue_cpp, event); - routine.SetUp(); routine.DoSpr2(layout, triangle, n, alpha, @@ -1698,7 +1663,6 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos try { auto queue_cpp = Queue(*queue); auto routine = Xgemm(queue_cpp, event); - routine.SetUp(); routine.DoGemm(layout, a_transpose, b_transpose, m, n, k, alpha, @@ -1763,7 +1727,6 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsymm(queue_cpp, event); - routine.SetUp(); routine.DoSymm(layout, side, triangle, m, n, alpha, @@ -1828,7 +1791,6 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhemm(queue_cpp, event); - routine.SetUp(); routine.DoHemm(layout, side, triangle, m, n, alpha, @@ -1868,7 +1830,6 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xsyrk(queue_cpp, event); - routine.SetUp(); routine.DoSyrk(layout, triangle, a_transpose, n, k, alpha, @@ -1926,7 +1887,6 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xherk,T>(queue_cpp, event); - routine.SetUp(); routine.DoHerk(layout, triangle, a_transpose, n, k, alpha, @@ -1964,7 +1924,6 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a try { auto queue_cpp = Queue(*queue); auto routine = Xsyr2k(queue_cpp, event); - routine.SetUp(); routine.DoSyr2k(layout, triangle, ab_transpose, n, k, alpha, @@ -2029,7 +1988,6 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a try { auto queue_cpp = Queue(*queue); auto routine = Xher2k(queue_cpp, event); - routine.SetUp(); routine.DoHer2k(layout, triangle, ab_transpose, n, k, alpha, @@ -2068,7 +2026,6 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c try { auto queue_cpp = Queue(*queue); auto routine = Xtrmm(queue_cpp, event); - routine.SetUp(); routine.DoTrmm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, @@ -2164,7 +2121,6 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xomatcopy(queue_cpp, event); - routine.SetUp(); routine.DoOmatcopy(layout, a_transpose, m, n, alpha, @@ -2225,57 +2181,57 @@ StatusCode FillCache(const cl_device_id device) { auto queue = Queue(context, device_cpp); // Runs all the level 1 set-up functions - Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); - Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); Xswap(queue, nullptr).SetUp(); - Xscal(queue, nullptr).SetUp(); Xscal(queue, nullptr).SetUp(); Xscal(queue, nullptr).SetUp(); Xscal(queue, nullptr).SetUp(); - Xcopy(queue, nullptr).SetUp(); Xcopy(queue, nullptr).SetUp(); Xcopy(queue, nullptr).SetUp(); Xcopy(queue, nullptr).SetUp(); - Xaxpy(queue, nullptr).SetUp(); Xaxpy(queue, nullptr).SetUp(); Xaxpy(queue, nullptr).SetUp(); Xaxpy(queue, nullptr).SetUp(); - Xdot(queue, nullptr).SetUp(); Xdot(queue, nullptr).SetUp(); - Xdotu(queue, nullptr).SetUp(); Xdotu(queue, nullptr).SetUp(); - Xdotc(queue, nullptr).SetUp(); Xdotc(queue, nullptr).SetUp(); - Xnrm2(queue, nullptr).SetUp(); Xnrm2(queue, nullptr).SetUp(); Xnrm2(queue, nullptr).SetUp(); Xnrm2(queue, nullptr).SetUp(); - Xasum(queue, nullptr).SetUp(); Xasum(queue, nullptr).SetUp(); Xasum(queue, nullptr).SetUp(); Xasum(queue, nullptr).SetUp(); - Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); - Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); - Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); - Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); + Xswap(queue, nullptr); Xswap(queue, nullptr); Xswap(queue, nullptr); Xswap(queue, nullptr); + Xswap(queue, nullptr); Xswap(queue, nullptr); Xswap(queue, nullptr); Xswap(queue, nullptr); + Xscal(queue, nullptr); Xscal(queue, nullptr); Xscal(queue, nullptr); Xscal(queue, nullptr); + Xcopy(queue, nullptr); Xcopy(queue, nullptr); Xcopy(queue, nullptr); Xcopy(queue, nullptr); + Xaxpy(queue, nullptr); Xaxpy(queue, nullptr); Xaxpy(queue, nullptr); Xaxpy(queue, nullptr); + Xdot(queue, nullptr); Xdot(queue, nullptr); + Xdotu(queue, nullptr); Xdotu(queue, nullptr); + Xdotc(queue, nullptr); Xdotc(queue, nullptr); + Xnrm2(queue, nullptr); Xnrm2(queue, nullptr); Xnrm2(queue, nullptr); Xnrm2(queue, nullptr); + Xasum(queue, nullptr); Xasum(queue, nullptr); Xasum(queue, nullptr); Xasum(queue, nullptr); + Xsum(queue, nullptr); Xsum(queue, nullptr); Xsum(queue, nullptr); Xsum(queue, nullptr); + Xamax(queue, nullptr); Xamax(queue, nullptr); Xamax(queue, nullptr); Xamax(queue, nullptr); + Xmax(queue, nullptr); Xmax(queue, nullptr); Xmax(queue, nullptr); Xmax(queue, nullptr); + Xmin(queue, nullptr); Xmin(queue, nullptr); Xmin(queue, nullptr); Xmin(queue, nullptr); // Runs all the level 2 set-up functions - Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); - Xgbmv(queue, nullptr).SetUp(); Xgbmv(queue, nullptr).SetUp(); Xgbmv(queue, nullptr).SetUp(); Xgbmv(queue, nullptr).SetUp(); - Xhemv(queue, nullptr).SetUp(); Xhemv(queue, nullptr).SetUp(); - Xhbmv(queue, nullptr).SetUp(); Xhbmv(queue, nullptr).SetUp(); - Xhpmv(queue, nullptr).SetUp(); Xhpmv(queue, nullptr).SetUp(); - Xsymv(queue, nullptr).SetUp(); Xsymv(queue, nullptr).SetUp(); - Xsbmv(queue, nullptr).SetUp(); Xsbmv(queue, nullptr).SetUp(); - Xspmv(queue, nullptr).SetUp(); Xspmv(queue, nullptr).SetUp(); - Xtrmv(queue, nullptr).SetUp(); Xtrmv(queue, nullptr).SetUp(); Xtrmv(queue, nullptr).SetUp(); Xtrmv(queue, nullptr).SetUp(); - Xtbmv(queue, nullptr).SetUp(); Xtbmv(queue, nullptr).SetUp(); Xtbmv(queue, nullptr).SetUp(); Xtbmv(queue, nullptr).SetUp(); - Xtpmv(queue, nullptr).SetUp(); Xtpmv(queue, nullptr).SetUp(); Xtpmv(queue, nullptr).SetUp(); Xtpmv(queue, nullptr).SetUp(); - Xger(queue, nullptr).SetUp(); Xger(queue, nullptr).SetUp(); - Xgeru(queue, nullptr).SetUp(); Xgeru(queue, nullptr).SetUp(); - Xgerc(queue, nullptr).SetUp(); Xgerc(queue, nullptr).SetUp(); - Xher(queue, nullptr).SetUp(); Xher(queue, nullptr).SetUp(); - Xhpr(queue, nullptr).SetUp(); Xhpr(queue, nullptr).SetUp(); - Xher2(queue, nullptr).SetUp(); Xher2(queue, nullptr).SetUp(); - Xhpr2(queue, nullptr).SetUp(); Xhpr2(queue, nullptr).SetUp(); - Xsyr(queue, nullptr).SetUp(); Xsyr(queue, nullptr).SetUp(); - Xspr(queue, nullptr).SetUp(); Xspr(queue, nullptr).SetUp(); - Xsyr2(queue, nullptr).SetUp(); Xsyr2(queue, nullptr).SetUp(); - Xspr2(queue, nullptr).SetUp(); Xspr2(queue, nullptr).SetUp(); + Xgemv(queue, nullptr); Xgemv(queue, nullptr); Xgemv(queue, nullptr); Xgemv(queue, nullptr); + Xgbmv(queue, nullptr); Xgbmv(queue, nullptr); Xgbmv(queue, nullptr); Xgbmv(queue, nullptr); + Xhemv(queue, nullptr); Xhemv(queue, nullptr); + Xhbmv(queue, nullptr); Xhbmv(queue, nullptr); + Xhpmv(queue, nullptr); Xhpmv(queue, nullptr); + Xsymv(queue, nullptr); Xsymv(queue, nullptr); + Xsbmv(queue, nullptr); Xsbmv(queue, nullptr); + Xspmv(queue, nullptr); Xspmv(queue, nullptr); + Xtrmv(queue, nullptr); Xtrmv(queue, nullptr); Xtrmv(queue, nullptr); Xtrmv(queue, nullptr); + Xtbmv(queue, nullptr); Xtbmv(queue, nullptr); Xtbmv(queue, nullptr); Xtbmv(queue, nullptr); + Xtpmv(queue, nullptr); Xtpmv(queue, nullptr); Xtpmv(queue, nullptr); Xtpmv(queue, nullptr); + Xger(queue, nullptr); Xger(queue, nullptr); + Xgeru(queue, nullptr); Xgeru(queue, nullptr); + Xgerc(queue, nullptr); Xgerc(queue, nullptr); + Xher(queue, nullptr); Xher(queue, nullptr); + Xhpr(queue, nullptr); Xhpr(queue, nullptr); + Xher2(queue, nullptr); Xher2(queue, nullptr); + Xhpr2(queue, nullptr); Xhpr2(queue, nullptr); + Xsyr(queue, nullptr); Xsyr(queue, nullptr); + Xspr(queue, nullptr); Xspr(queue, nullptr); + Xsyr2(queue, nullptr); Xsyr2(queue, nullptr); + Xspr2(queue, nullptr); Xspr2(queue, nullptr); // Runs all the level 3 set-up functions - Xgemm(queue, nullptr).SetUp(); Xgemm(queue, nullptr).SetUp(); Xgemm(queue, nullptr).SetUp(); Xgemm(queue, nullptr).SetUp(); - Xsymm(queue, nullptr).SetUp(); Xsymm(queue, nullptr).SetUp(); Xsymm(queue, nullptr).SetUp(); Xsymm(queue, nullptr).SetUp(); - Xhemm(queue, nullptr).SetUp(); Xhemm(queue, nullptr).SetUp(); - Xsyrk(queue, nullptr).SetUp(); Xsyrk(queue, nullptr).SetUp(); Xsyrk(queue, nullptr).SetUp(); Xsyrk(queue, nullptr).SetUp(); - Xherk(queue, nullptr).SetUp(); Xherk(queue, nullptr).SetUp(); - Xsyr2k(queue, nullptr).SetUp(); Xsyr2k(queue, nullptr).SetUp(); Xsyr2k(queue, nullptr).SetUp(); Xsyr2k(queue, nullptr).SetUp(); - Xher2k(queue, nullptr).SetUp(); Xher2k(queue, nullptr).SetUp(); - Xtrmm(queue, nullptr).SetUp(); Xtrmm(queue, nullptr).SetUp(); Xtrmm(queue, nullptr).SetUp(); Xtrmm(queue, nullptr).SetUp(); + Xgemm(queue, nullptr); Xgemm(queue, nullptr); Xgemm(queue, nullptr); Xgemm(queue, nullptr); + Xsymm(queue, nullptr); Xsymm(queue, nullptr); Xsymm(queue, nullptr); Xsymm(queue, nullptr); + Xhemm(queue, nullptr); Xhemm(queue, nullptr); + Xsyrk(queue, nullptr); Xsyrk(queue, nullptr); Xsyrk(queue, nullptr); Xsyrk(queue, nullptr); + Xherk(queue, nullptr); Xherk(queue, nullptr); + Xsyr2k(queue, nullptr); Xsyr2k(queue, nullptr); Xsyr2k(queue, nullptr); Xsyr2k(queue, nullptr); + Xher2k(queue, nullptr); Xher2k(queue, nullptr); + Xtrmm(queue, nullptr); Xtrmm(queue, nullptr); Xtrmm(queue, nullptr); Xtrmm(queue, nullptr); // Runs all the level 3 set-up functions - Xomatcopy(queue, nullptr).SetUp(); Xomatcopy(queue, nullptr).SetUp(); Xomatcopy(queue, nullptr).SetUp(); Xomatcopy(queue, nullptr).SetUp(); + Xomatcopy(queue, nullptr); Xomatcopy(queue, nullptr); Xomatcopy(queue, nullptr); Xomatcopy(queue, nullptr); } catch (...) { return DispatchException(); } return StatusCode::kSuccess; diff --git a/src/routine.cpp b/src/routine.cpp index 5e3a9dfe..acafb0d2 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -21,10 +21,11 @@ namespace clblast { // ================================================================================================= -// Constructor: not much here, because no status codes can be returned +// The constructor does all heavy work, errors are returned as exceptions Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase): + const std::vector &userDatabase, + std::initializer_list source): precision_(precision), routine_name_(name), queue_(queue), @@ -33,12 +34,6 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name, device_(queue_.GetDevice()), device_name_(device_.Name()), db_(queue_, routines, precision_, userDatabase) { -} - -// ================================================================================================= - -// Separate set-up function to allow for status codes to be returned -void Routine::SetUp() { // Queries the cache to see whether or not the program (context-specific) is already there if (ProgramIsInCache(context_, precision_, routine_name_)) { return; } @@ -77,37 +72,39 @@ void Routine::SetUp() { } } - // Loads the common header (typedefs and defines and such) - std::string common_header = - #include "kernels/common.opencl" - ; - // Collects the parameters for this device in the form of defines, and adds the precision - auto defines = db_.GetDefines(); - defines += "#define PRECISION "+ToString(static_cast(precision_))+"\n"; + auto source_string = db_.GetDefines(); + source_string += "#define PRECISION "+ToString(static_cast(precision_))+"\n"; // Adds the name of the routine as a define - defines += "#define ROUTINE_"+routine_name_+"\n"; + source_string += "#define ROUTINE_"+routine_name_+"\n"; // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_CL_MAD 1\n"; + source_string += "#define USE_CL_MAD 1\n"; } // For specific devices, use staggered/shuffled workgroup indices. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_STAGGERED_INDICES 1\n"; + source_string += "#define USE_STAGGERED_INDICES 1\n"; } // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize // performance through better cache behaviour if (device_.IsARM() && device_.IsGPU()) { - defines += "#define GLOBAL_MEM_FENCE 1\n"; + source_string += "#define GLOBAL_MEM_FENCE 1\n"; } - // Combines everything together into a single source string - const auto source_string = defines + common_header + source_string_; + // Loads the common header (typedefs and defines and such) + source_string += + #include "kernels/common.opencl" + ; + + // Adds routine-specific code to the constructed source string + for (const char *s: source) { + source_string += s; + } // Prints details of the routine to compile in case of debugging in verbose mode #ifdef VERBOSE diff --git a/src/routine.hpp b/src/routine.hpp index c2bcdaff..f4ad435e 100644 --- a/src/routine.hpp +++ b/src/routine.hpp @@ -34,21 +34,19 @@ class Routine { // Base class constructor. The user database is an optional extra database to override the // built-in database. + // All heavy preparation work is done inside this constructor. explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase = {}); - - // Set-up phase of the kernel - void SetUp(); + const std::vector &userDatabase, + std::initializer_list source); protected: // Non-static variable for the precision const Precision precision_; - // The routine's name and its kernel-source in string form + // The routine's name const std::string routine_name_; - std::string source_string_; // The OpenCL objects, accessible only from derived classes Queue queue_; diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp index 8307188b..e9efa1a7 100644 --- a/src/routines/level1/xamax.cpp +++ b/src/routines/level1/xamax.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template Xamax::Xamax(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue(), {}, { #include "../../kernels/level1/xamax.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp index 9dde7a87..a242a5fa 100644 --- a/src/routines/level1/xasum.cpp +++ b/src/routines/level1/xasum.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template Xasum::Xasum(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue(), {}, { #include "../../kernels/level1/xasum.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp index cbcbb3cd..5436c5b7 100644 --- a/src/routines/level1/xaxpy.cpp +++ b/src/routines/level1/xaxpy.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xaxpy::Xaxpy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xaxpy.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp index 3bfbada6..d86200c0 100644 --- a/src/routines/level1/xcopy.cpp +++ b/src/routines/level1/xcopy.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xcopy::Xcopy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xcopy.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp index e0d297f8..9d718913 100644 --- a/src/routines/level1/xdot.cpp +++ b/src/routines/level1/xdot.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template Xdot::Xdot(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue(), {}, { #include "../../kernels/level1/xdot.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp index eb795498..373820a4 100644 --- a/src/routines/level1/xnrm2.cpp +++ b/src/routines/level1/xnrm2.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template Xnrm2::Xnrm2(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue(), {}, { #include "../../kernels/level1/xnrm2.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp index ed126879..17410f01 100644 --- a/src/routines/level1/xscal.cpp +++ b/src/routines/level1/xscal.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xscal::Xscal(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xscal.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp index 2f2c0370..c9b97dc9 100644 --- a/src/routines/level1/xswap.cpp +++ b/src/routines/level1/xswap.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xswap::Xswap(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xswap.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 97dcd8ef..7b4c2e8f 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xgemv::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue(), {}, { #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp index f22131bb..d16ebd11 100644 --- a/src/routines/level2/xger.cpp +++ b/src/routines/level2/xger.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xger::Xger(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xger.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp index 4cf27702..6c334e63 100644 --- a/src/routines/level2/xher.cpp +++ b/src/routines/level2/xher.cpp @@ -21,11 +21,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xher::Xher(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp index c93585de..11e2c871 100644 --- a/src/routines/level2/xher2.cpp +++ b/src/routines/level2/xher2.cpp @@ -21,11 +21,10 @@ namespace clblast { // Constructor: forwards to base class constructor template Xher2::Xher2(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher2.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index a6f7c286..4f70dc7a 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -24,8 +24,7 @@ template Xgemm::Xgemm(Queue &queue, EventPointer event, const std::string &name): Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"}, - PrecisionValue()) { - source_string_ = + PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -37,13 +36,11 @@ Xgemm::Xgemm(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_direct_part1.opencl" #include "../../kernels/level3/xgemm_direct_part2.opencl" #include "../../kernels/level3/xgemm_direct_part3.opencl" - ; - auto source_string_part_2 = // separated in two parts to prevent C1091 in MSVC 2013 + , // separated in two parts to prevent C1091 in MSVC 2013 #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; - source_string_ += source_string_part_2; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp index a326dfbe..7244c848 100644 --- a/src/routines/level3/xher2k.cpp +++ b/src/routines/level3/xher2k.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xher2k::Xher2k(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xher2k::Xher2k(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp index 6e36714e..865c6c37 100644 --- a/src/routines/level3/xherk.cpp +++ b/src/routines/level3/xherk.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xherk::Xherk(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xherk::Xherk(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp index b10ee586..826854a8 100644 --- a/src/routines/level3/xsyr2k.cpp +++ b/src/routines/level3/xsyr2k.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xsyr2k::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xsyr2k::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp index 93fd4666..9aa8ca2d 100644 --- a/src/routines/level3/xsyrk.cpp +++ b/src/routines/level3/xsyrk.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xsyrk::Xsyrk(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xsyrk::Xsyrk(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/levelx/xomatcopy.cpp b/src/routines/levelx/xomatcopy.cpp index e053c076..875ca7d2 100644 --- a/src/routines/levelx/xomatcopy.cpp +++ b/src/routines/levelx/xomatcopy.cpp @@ -22,14 +22,13 @@ namespace clblast { // Constructor: forwards to base class constructor template Xomatcopy::Xomatcopy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" #include "../../kernels/level3/transpose_fast.opencl" #include "../../kernels/level3/transpose_pad.opencl" - ; + }) { } // ================================================================================================= -- cgit v1.2.3