diff options
author | Ivan Shapovalov <intelfx@intelfx.name> | 2016-10-18 04:53:06 +0300 |
---|---|---|
committer | Ivan Shapovalov <intelfx@intelfx.name> | 2016-10-22 08:45:27 +0300 |
commit | 56f300607b1d0b81ab3269894fda5a066c46cdeb (patch) | |
tree | 60b4c5566cc4bbfad15a7791a4e20c2a60e16707 /src | |
parent | b98af44fcf89b9946e1de438b1f5527e6bf28905 (diff) |
Routine: get rid of ::SetUp()
Since we now use C++ exceptions inside the implementation (and exceptions
can be thrown from constructors), there is no need for a separate
Routine::SetUp() function.
For this, we also change the way how the kernel source string is constructed.
The kernel-specific source code is now passed to the Routine ctor via
an initializer_list of C strings to avoid unnecessary data copying
while also working around C1091 of MSVC 2013.
Diffstat (limited to 'src')
-rw-r--r-- | src/clblast.cpp | 134 | ||||
-rw-r--r-- | src/routine.cpp | 39 | ||||
-rw-r--r-- | src/routine.hpp | 10 | ||||
-rw-r--r-- | src/routines/level1/xamax.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xasum.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xaxpy.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xcopy.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xdot.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xnrm2.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xscal.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xswap.cpp | 5 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cpp | 5 | ||||
-rw-r--r-- | src/routines/level2/xger.cpp | 5 | ||||
-rw-r--r-- | src/routines/level2/xher.cpp | 5 | ||||
-rw-r--r-- | src/routines/level2/xher2.cpp | 5 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cpp | 9 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cpp | 5 | ||||
-rw-r--r-- | src/routines/level3/xherk.cpp | 5 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cpp | 5 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cpp | 5 | ||||
-rw-r--r-- | src/routines/levelx/xomatcopy.cpp | 5 |
21 files changed, 104 insertions, 173 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp index 8e7b042f..4bb4e0b3 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -171,7 +171,6 @@ StatusCode Swap(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xswap<T>(queue_cpp, event); - routine.SetUp(); routine.DoSwap(n, Buffer<T>(x_buffer), x_offset, x_inc, Buffer<T>(y_buffer), y_offset, y_inc); @@ -208,7 +207,6 @@ StatusCode Scal(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xscal<T>(queue_cpp, event); - routine.SetUp(); routine.DoScal(n, alpha, Buffer<T>(x_buffer), x_offset, x_inc); @@ -245,7 +243,6 @@ StatusCode Copy(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xcopy<T>(queue_cpp, event); - routine.SetUp(); routine.DoCopy(n, Buffer<T>(x_buffer), x_offset, x_inc, Buffer<T>(y_buffer), y_offset, y_inc); @@ -283,7 +280,6 @@ StatusCode Axpy(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xaxpy<T>(queue_cpp, event); - routine.SetUp(); routine.DoAxpy(n, alpha, Buffer<T>(x_buffer), x_offset, x_inc, @@ -327,7 +323,6 @@ StatusCode Dot(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdot<T>(queue_cpp, event); - routine.SetUp(); routine.DoDot(n, Buffer<T>(dot_buffer), dot_offset, Buffer<T>(x_buffer), x_offset, x_inc, @@ -361,7 +356,6 @@ StatusCode Dotu(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdotu<T>(queue_cpp, event); - routine.SetUp(); routine.DoDotu(n, Buffer<T>(dot_buffer), dot_offset, Buffer<T>(x_buffer), x_offset, x_inc, @@ -390,7 +384,6 @@ StatusCode Dotc(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xdotc<T>(queue_cpp, event); - routine.SetUp(); routine.DoDotc(n, Buffer<T>(dot_buffer), dot_offset, Buffer<T>(x_buffer), x_offset, x_inc, @@ -418,7 +411,6 @@ StatusCode Nrm2(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xnrm2<T>(queue_cpp, event); - routine.SetUp(); routine.DoNrm2(n, Buffer<T>(nrm2_buffer), nrm2_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -455,7 +447,6 @@ StatusCode Asum(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xasum<T>(queue_cpp, event); - routine.SetUp(); routine.DoAsum(n, Buffer<T>(asum_buffer), asum_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -492,7 +483,6 @@ StatusCode Sum(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xsum<T>(queue_cpp, event); - routine.SetUp(); routine.DoSum(n, Buffer<T>(sum_buffer), sum_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -529,7 +519,6 @@ StatusCode Amax(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xamax<T>(queue_cpp, event); - routine.SetUp(); routine.DoAmax(n, Buffer<unsigned int>(imax_buffer), imax_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -566,7 +555,6 @@ StatusCode Max(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xmax<T>(queue_cpp, event); - routine.SetUp(); routine.DoMax(n, Buffer<unsigned int>(imax_buffer), imax_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -603,7 +591,6 @@ StatusCode Min(const size_t n, try { auto queue_cpp = Queue(*queue); auto routine = Xmin<T>(queue_cpp, event); - routine.SetUp(); routine.DoMin(n, Buffer<unsigned int>(imin_buffer), imin_offset, Buffer<T>(x_buffer), x_offset, x_inc); @@ -648,7 +635,6 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xgemv<T>(queue_cpp, event); - routine.SetUp(); routine.DoGemv(layout, a_transpose, m, n, alpha, @@ -713,7 +699,6 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xgbmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoGbmv(layout, a_transpose, m, n, kl, ku, alpha, @@ -778,7 +763,6 @@ StatusCode Hemv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhemv<T>(queue_cpp, event); - routine.SetUp(); routine.DoHemv(layout, triangle, n, alpha, @@ -819,7 +803,6 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhbmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoHbmv(layout, triangle, n, k, alpha, @@ -860,7 +843,6 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoHpmv(layout, triangle, n, alpha, @@ -901,7 +883,6 @@ StatusCode Symv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsymv<T>(queue_cpp, event); - routine.SetUp(); routine.DoSymv(layout, triangle, n, alpha, @@ -950,7 +931,6 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsbmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoSbmv(layout, triangle, n, k, alpha, @@ -999,7 +979,6 @@ StatusCode Spmv(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoSpmv(layout, triangle, n, alpha, @@ -1045,7 +1024,6 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtrmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoTrmv(layout, triangle, a_transpose, diagonal, n, Buffer<T>(a_buffer), a_offset, a_ld, @@ -1089,7 +1067,6 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtbmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoTbmv(layout, triangle, a_transpose, diagonal, n, k, Buffer<T>(a_buffer), a_offset, a_ld, @@ -1133,7 +1110,6 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xtpmv<T>(queue_cpp, event); - routine.SetUp(); routine.DoTpmv(layout, triangle, a_transpose, diagonal, n, Buffer<T>(ap_buffer), ap_offset, @@ -1269,7 +1245,6 @@ StatusCode Ger(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xger<T>(queue_cpp, event); - routine.SetUp(); routine.DoGer(layout, m, n, alpha, @@ -1313,7 +1288,6 @@ StatusCode Geru(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xgeru<T>(queue_cpp, event); - routine.SetUp(); routine.DoGeru(layout, m, n, alpha, @@ -1350,7 +1324,6 @@ StatusCode Gerc(const Layout layout, try { auto queue_cpp = Queue(*queue); auto routine = Xgerc<T>(queue_cpp, event); - routine.SetUp(); routine.DoGerc(layout, m, n, alpha, @@ -1386,7 +1359,6 @@ StatusCode Her(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xher<std::complex<T>,T>(queue_cpp, event); - routine.SetUp(); routine.DoHer(layout, triangle, n, alpha, @@ -1419,7 +1391,6 @@ StatusCode Hpr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event); - routine.SetUp(); routine.DoHpr(layout, triangle, n, alpha, @@ -1453,7 +1424,6 @@ StatusCode Her2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xher2<T>(queue_cpp, event); - routine.SetUp(); routine.DoHer2(layout, triangle, n, alpha, @@ -1490,7 +1460,6 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhpr2<T>(queue_cpp, event); - routine.SetUp(); routine.DoHpr2(layout, triangle, n, alpha, @@ -1526,7 +1495,6 @@ StatusCode Syr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsyr<T>(queue_cpp, event); - routine.SetUp(); routine.DoSyr(layout, triangle, n, alpha, @@ -1565,7 +1533,6 @@ StatusCode Spr(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspr<T>(queue_cpp, event); - routine.SetUp(); routine.DoSpr(layout, triangle, n, alpha, @@ -1605,7 +1572,6 @@ StatusCode Syr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsyr2<T>(queue_cpp, event); - routine.SetUp(); routine.DoSyr2(layout, triangle, n, alpha, @@ -1649,7 +1615,6 @@ StatusCode Spr2(const Layout layout, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xspr2<T>(queue_cpp, event); - routine.SetUp(); routine.DoSpr2(layout, triangle, n, alpha, @@ -1698,7 +1663,6 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos try { auto queue_cpp = Queue(*queue); auto routine = Xgemm<T>(queue_cpp, event); - routine.SetUp(); routine.DoGemm(layout, a_transpose, b_transpose, m, n, k, alpha, @@ -1763,7 +1727,6 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xsymm<T>(queue_cpp, event); - routine.SetUp(); routine.DoSymm(layout, side, triangle, m, n, alpha, @@ -1828,7 +1791,6 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, try { auto queue_cpp = Queue(*queue); auto routine = Xhemm<T>(queue_cpp, event); - routine.SetUp(); routine.DoHemm(layout, side, triangle, m, n, alpha, @@ -1868,7 +1830,6 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xsyrk<T>(queue_cpp, event); - routine.SetUp(); routine.DoSyrk(layout, triangle, a_transpose, n, k, alpha, @@ -1926,7 +1887,6 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_ try { auto queue_cpp = Queue(*queue); auto routine = Xherk<std::complex<T>,T>(queue_cpp, event); - routine.SetUp(); routine.DoHerk(layout, triangle, a_transpose, n, k, alpha, @@ -1964,7 +1924,6 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a try { auto queue_cpp = Queue(*queue); auto routine = Xsyr2k<T>(queue_cpp, event); - routine.SetUp(); routine.DoSyr2k(layout, triangle, ab_transpose, n, k, alpha, @@ -2029,7 +1988,6 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a try { auto queue_cpp = Queue(*queue); auto routine = Xher2k<T,U>(queue_cpp, event); - routine.SetUp(); routine.DoHer2k(layout, triangle, ab_transpose, n, k, alpha, @@ -2068,7 +2026,6 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c try { auto queue_cpp = Queue(*queue); auto routine = Xtrmm<T>(queue_cpp, event); - routine.SetUp(); routine.DoTrmm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, @@ -2164,7 +2121,6 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, try { auto queue_cpp = Queue(*queue); auto routine = Xomatcopy<T>(queue_cpp, event); - routine.SetUp(); routine.DoOmatcopy(layout, a_transpose, m, n, alpha, @@ -2225,57 +2181,57 @@ StatusCode FillCache(const cl_device_id device) { auto queue = Queue(context, device_cpp); // Runs all the level 1 set-up functions - Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp(); - Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp(); - Xscal<float>(queue, nullptr).SetUp(); Xscal<double>(queue, nullptr).SetUp(); Xscal<float2>(queue, nullptr).SetUp(); Xscal<double2>(queue, nullptr).SetUp(); - Xcopy<float>(queue, nullptr).SetUp(); Xcopy<double>(queue, nullptr).SetUp(); Xcopy<float2>(queue, nullptr).SetUp(); Xcopy<double2>(queue, nullptr).SetUp(); - Xaxpy<float>(queue, nullptr).SetUp(); Xaxpy<double>(queue, nullptr).SetUp(); Xaxpy<float2>(queue, nullptr).SetUp(); Xaxpy<double2>(queue, nullptr).SetUp(); - Xdot<float>(queue, nullptr).SetUp(); Xdot<double>(queue, nullptr).SetUp(); - Xdotu<float2>(queue, nullptr).SetUp(); Xdotu<double2>(queue, nullptr).SetUp(); - Xdotc<float2>(queue, nullptr).SetUp(); Xdotc<double2>(queue, nullptr).SetUp(); - Xnrm2<float>(queue, nullptr).SetUp(); Xnrm2<double>(queue, nullptr).SetUp(); Xnrm2<float2>(queue, nullptr).SetUp(); Xnrm2<double2>(queue, nullptr).SetUp(); - Xasum<float>(queue, nullptr).SetUp(); Xasum<double>(queue, nullptr).SetUp(); Xasum<float2>(queue, nullptr).SetUp(); Xasum<double2>(queue, nullptr).SetUp(); - Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp(); - Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp(); - Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp(); - Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp(); + Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr); + Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr); + Xscal<float>(queue, nullptr); Xscal<double>(queue, nullptr); Xscal<float2>(queue, nullptr); Xscal<double2>(queue, nullptr); + Xcopy<float>(queue, nullptr); Xcopy<double>(queue, nullptr); Xcopy<float2>(queue, nullptr); Xcopy<double2>(queue, nullptr); + Xaxpy<float>(queue, nullptr); Xaxpy<double>(queue, nullptr); Xaxpy<float2>(queue, nullptr); Xaxpy<double2>(queue, nullptr); + Xdot<float>(queue, nullptr); Xdot<double>(queue, nullptr); + Xdotu<float2>(queue, nullptr); Xdotu<double2>(queue, nullptr); + Xdotc<float2>(queue, nullptr); Xdotc<double2>(queue, nullptr); + Xnrm2<float>(queue, nullptr); Xnrm2<double>(queue, nullptr); Xnrm2<float2>(queue, nullptr); Xnrm2<double2>(queue, nullptr); + Xasum<float>(queue, nullptr); Xasum<double>(queue, nullptr); Xasum<float2>(queue, nullptr); Xasum<double2>(queue, nullptr); + Xsum<float>(queue, nullptr); Xsum<double>(queue, nullptr); Xsum<float2>(queue, nullptr); Xsum<double2>(queue, nullptr); + Xamax<float>(queue, nullptr); Xamax<double>(queue, nullptr); Xamax<float2>(queue, nullptr); Xamax<double2>(queue, nullptr); + Xmax<float>(queue, nullptr); Xmax<double>(queue, nullptr); Xmax<float2>(queue, nullptr); Xmax<double2>(queue, nullptr); + Xmin<float>(queue, nullptr); Xmin<double>(queue, nullptr); Xmin<float2>(queue, nullptr); Xmin<double2>(queue, nullptr); // Runs all the level 2 set-up functions - Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp(); - Xgbmv<float>(queue, nullptr).SetUp(); Xgbmv<double>(queue, nullptr).SetUp(); Xgbmv<float2>(queue, nullptr).SetUp(); Xgbmv<double2>(queue, nullptr).SetUp(); - Xhemv<float2>(queue, nullptr).SetUp(); Xhemv<double2>(queue, nullptr).SetUp(); - Xhbmv<float2>(queue, nullptr).SetUp(); Xhbmv<double2>(queue, nullptr).SetUp(); - Xhpmv<float2>(queue, nullptr).SetUp(); Xhpmv<double2>(queue, nullptr).SetUp(); - Xsymv<float>(queue, nullptr).SetUp(); Xsymv<double>(queue, nullptr).SetUp(); - Xsbmv<float>(queue, nullptr).SetUp(); Xsbmv<double>(queue, nullptr).SetUp(); - Xspmv<float>(queue, nullptr).SetUp(); Xspmv<double>(queue, nullptr).SetUp(); - Xtrmv<float>(queue, nullptr).SetUp(); Xtrmv<double>(queue, nullptr).SetUp(); Xtrmv<float2>(queue, nullptr).SetUp(); Xtrmv<double2>(queue, nullptr).SetUp(); - Xtbmv<float>(queue, nullptr).SetUp(); Xtbmv<double>(queue, nullptr).SetUp(); Xtbmv<float2>(queue, nullptr).SetUp(); Xtbmv<double2>(queue, nullptr).SetUp(); - Xtpmv<float>(queue, nullptr).SetUp(); Xtpmv<double>(queue, nullptr).SetUp(); Xtpmv<float2>(queue, nullptr).SetUp(); Xtpmv<double2>(queue, nullptr).SetUp(); - Xger<float>(queue, nullptr).SetUp(); Xger<double>(queue, nullptr).SetUp(); - Xgeru<float2>(queue, nullptr).SetUp(); Xgeru<double2>(queue, nullptr).SetUp(); - Xgerc<float2>(queue, nullptr).SetUp(); Xgerc<double2>(queue, nullptr).SetUp(); - Xher<float2,float>(queue, nullptr).SetUp(); Xher<double2,double>(queue, nullptr).SetUp(); - Xhpr<float2,float>(queue, nullptr).SetUp(); Xhpr<double2,double>(queue, nullptr).SetUp(); - Xher2<float2>(queue, nullptr).SetUp(); Xher2<double2>(queue, nullptr).SetUp(); - Xhpr2<float2>(queue, nullptr).SetUp(); Xhpr2<double2>(queue, nullptr).SetUp(); - Xsyr<float>(queue, nullptr).SetUp(); Xsyr<double>(queue, nullptr).SetUp(); - Xspr<float>(queue, nullptr).SetUp(); Xspr<double>(queue, nullptr).SetUp(); - Xsyr2<float>(queue, nullptr).SetUp(); Xsyr2<double>(queue, nullptr).SetUp(); - Xspr2<float>(queue, nullptr).SetUp(); Xspr2<double>(queue, nullptr).SetUp(); + Xgemv<float>(queue, nullptr); Xgemv<double>(queue, nullptr); Xgemv<float2>(queue, nullptr); Xgemv<double2>(queue, nullptr); + Xgbmv<float>(queue, nullptr); Xgbmv<double>(queue, nullptr); Xgbmv<float2>(queue, nullptr); Xgbmv<double2>(queue, nullptr); + Xhemv<float2>(queue, nullptr); Xhemv<double2>(queue, nullptr); + Xhbmv<float2>(queue, nullptr); Xhbmv<double2>(queue, nullptr); + Xhpmv<float2>(queue, nullptr); Xhpmv<double2>(queue, nullptr); + Xsymv<float>(queue, nullptr); Xsymv<double>(queue, nullptr); + Xsbmv<float>(queue, nullptr); Xsbmv<double>(queue, nullptr); + Xspmv<float>(queue, nullptr); Xspmv<double>(queue, nullptr); + Xtrmv<float>(queue, nullptr); Xtrmv<double>(queue, nullptr); Xtrmv<float2>(queue, nullptr); Xtrmv<double2>(queue, nullptr); + Xtbmv<float>(queue, nullptr); Xtbmv<double>(queue, nullptr); Xtbmv<float2>(queue, nullptr); Xtbmv<double2>(queue, nullptr); + Xtpmv<float>(queue, nullptr); Xtpmv<double>(queue, nullptr); Xtpmv<float2>(queue, nullptr); Xtpmv<double2>(queue, nullptr); + Xger<float>(queue, nullptr); Xger<double>(queue, nullptr); + Xgeru<float2>(queue, nullptr); Xgeru<double2>(queue, nullptr); + Xgerc<float2>(queue, nullptr); Xgerc<double2>(queue, nullptr); + Xher<float2,float>(queue, nullptr); Xher<double2,double>(queue, nullptr); + Xhpr<float2,float>(queue, nullptr); Xhpr<double2,double>(queue, nullptr); + Xher2<float2>(queue, nullptr); Xher2<double2>(queue, nullptr); + Xhpr2<float2>(queue, nullptr); Xhpr2<double2>(queue, nullptr); + Xsyr<float>(queue, nullptr); Xsyr<double>(queue, nullptr); + Xspr<float>(queue, nullptr); Xspr<double>(queue, nullptr); + Xsyr2<float>(queue, nullptr); Xsyr2<double>(queue, nullptr); + Xspr2<float>(queue, nullptr); Xspr2<double>(queue, nullptr); // Runs all the level 3 set-up functions - Xgemm<float>(queue, nullptr).SetUp(); Xgemm<double>(queue, nullptr).SetUp(); Xgemm<float2>(queue, nullptr).SetUp(); Xgemm<double2>(queue, nullptr).SetUp(); - Xsymm<float>(queue, nullptr).SetUp(); Xsymm<double>(queue, nullptr).SetUp(); Xsymm<float2>(queue, nullptr).SetUp(); Xsymm<double2>(queue, nullptr).SetUp(); - Xhemm<float2>(queue, nullptr).SetUp(); Xhemm<double2>(queue, nullptr).SetUp(); - Xsyrk<float>(queue, nullptr).SetUp(); Xsyrk<double>(queue, nullptr).SetUp(); Xsyrk<float2>(queue, nullptr).SetUp(); Xsyrk<double2>(queue, nullptr).SetUp(); - Xherk<float2,float>(queue, nullptr).SetUp(); Xherk<double2,double>(queue, nullptr).SetUp(); - Xsyr2k<float>(queue, nullptr).SetUp(); Xsyr2k<double>(queue, nullptr).SetUp(); Xsyr2k<float2>(queue, nullptr).SetUp(); Xsyr2k<double2>(queue, nullptr).SetUp(); - Xher2k<float2,float>(queue, nullptr).SetUp(); Xher2k<double2,double>(queue, nullptr).SetUp(); - Xtrmm<float>(queue, nullptr).SetUp(); Xtrmm<double>(queue, nullptr).SetUp(); Xtrmm<float2>(queue, nullptr).SetUp(); Xtrmm<double2>(queue, nullptr).SetUp(); + Xgemm<float>(queue, nullptr); Xgemm<double>(queue, nullptr); Xgemm<float2>(queue, nullptr); Xgemm<double2>(queue, nullptr); + Xsymm<float>(queue, nullptr); Xsymm<double>(queue, nullptr); Xsymm<float2>(queue, nullptr); Xsymm<double2>(queue, nullptr); + Xhemm<float2>(queue, nullptr); Xhemm<double2>(queue, nullptr); + Xsyrk<float>(queue, nullptr); Xsyrk<double>(queue, nullptr); Xsyrk<float2>(queue, nullptr); Xsyrk<double2>(queue, nullptr); + Xherk<float2,float>(queue, nullptr); Xherk<double2,double>(queue, nullptr); + Xsyr2k<float>(queue, nullptr); Xsyr2k<double>(queue, nullptr); Xsyr2k<float2>(queue, nullptr); Xsyr2k<double2>(queue, nullptr); + Xher2k<float2,float>(queue, nullptr); Xher2k<double2,double>(queue, nullptr); + Xtrmm<float>(queue, nullptr); Xtrmm<double>(queue, nullptr); Xtrmm<float2>(queue, nullptr); Xtrmm<double2>(queue, nullptr); // Runs all the level 3 set-up functions - Xomatcopy<float>(queue, nullptr).SetUp(); Xomatcopy<double>(queue, nullptr).SetUp(); Xomatcopy<float2>(queue, nullptr).SetUp(); Xomatcopy<double2>(queue, nullptr).SetUp(); + Xomatcopy<float>(queue, nullptr); Xomatcopy<double>(queue, nullptr); Xomatcopy<float2>(queue, nullptr); Xomatcopy<double2>(queue, nullptr); } catch (...) { return DispatchException(); } return StatusCode::kSuccess; diff --git a/src/routine.cpp b/src/routine.cpp index 5e3a9dfe..acafb0d2 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -21,10 +21,11 @@ namespace clblast { // ================================================================================================= -// Constructor: not much here, because no status codes can be returned +// The constructor does all heavy work, errors are returned as exceptions Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector<std::string> &routines, const Precision precision, - const std::vector<const Database::DatabaseEntry*> &userDatabase): + const std::vector<const Database::DatabaseEntry*> &userDatabase, + std::initializer_list<const char *> source): precision_(precision), routine_name_(name), queue_(queue), @@ -33,12 +34,6 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name, device_(queue_.GetDevice()), device_name_(device_.Name()), db_(queue_, routines, precision_, userDatabase) { -} - -// ================================================================================================= - -// Separate set-up function to allow for status codes to be returned -void Routine::SetUp() { // Queries the cache to see whether or not the program (context-specific) is already there if (ProgramIsInCache(context_, precision_, routine_name_)) { return; } @@ -77,37 +72,39 @@ void Routine::SetUp() { } } - // Loads the common header (typedefs and defines and such) - std::string common_header = - #include "kernels/common.opencl" - ; - // Collects the parameters for this device in the form of defines, and adds the precision - auto defines = db_.GetDefines(); - defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; + auto source_string = db_.GetDefines(); + source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; // Adds the name of the routine as a define - defines += "#define ROUTINE_"+routine_name_+"\n"; + source_string += "#define ROUTINE_"+routine_name_+"\n"; // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_CL_MAD 1\n"; + source_string += "#define USE_CL_MAD 1\n"; } // For specific devices, use staggered/shuffled workgroup indices. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_STAGGERED_INDICES 1\n"; + source_string += "#define USE_STAGGERED_INDICES 1\n"; } // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize // performance through better cache behaviour if (device_.IsARM() && device_.IsGPU()) { - defines += "#define GLOBAL_MEM_FENCE 1\n"; + source_string += "#define GLOBAL_MEM_FENCE 1\n"; } - // Combines everything together into a single source string - const auto source_string = defines + common_header + source_string_; + // Loads the common header (typedefs and defines and such) + source_string += + #include "kernels/common.opencl" + ; + + // Adds routine-specific code to the constructed source string + for (const char *s: source) { + source_string += s; + } // Prints details of the routine to compile in case of debugging in verbose mode #ifdef VERBOSE diff --git a/src/routine.hpp b/src/routine.hpp index c2bcdaff..f4ad435e 100644 --- a/src/routine.hpp +++ b/src/routine.hpp @@ -34,21 +34,19 @@ class Routine { // Base class constructor. The user database is an optional extra database to override the // built-in database. + // All heavy preparation work is done inside this constructor. explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector<std::string> &routines, const Precision precision, - const std::vector<const Database::DatabaseEntry*> &userDatabase = {}); - - // Set-up phase of the kernel - void SetUp(); + const std::vector<const Database::DatabaseEntry*> &userDatabase, + std::initializer_list<const char *> source); protected: // Non-static variable for the precision const Precision precision_; - // The routine's name and its kernel-source in string form + // The routine's name const std::string routine_name_; - std::string source_string_; // The OpenCL objects, accessible only from derived classes Queue queue_; diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp index 8307188b..e9efa1a7 100644 --- a/src/routines/level1/xamax.cpp +++ b/src/routines/level1/xamax.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/xamax.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp index 9dde7a87..a242a5fa 100644 --- a/src/routines/level1/xasum.cpp +++ b/src/routines/level1/xasum.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xasum<T>::Xasum(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/xasum.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp index cbcbb3cd..5436c5b7 100644 --- a/src/routines/level1/xaxpy.cpp +++ b/src/routines/level1/xaxpy.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xaxpy.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp index 3bfbada6..d86200c0 100644 --- a/src/routines/level1/xcopy.cpp +++ b/src/routines/level1/xcopy.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xcopy.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp index e0d297f8..9d718913 100644 --- a/src/routines/level1/xdot.cpp +++ b/src/routines/level1/xdot.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/xdot.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp index eb795498..373820a4 100644 --- a/src/routines/level1/xnrm2.cpp +++ b/src/routines/level1/xnrm2.cpp @@ -22,10 +22,9 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/xnrm2.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp index ed126879..17410f01 100644 --- a/src/routines/level1/xscal.cpp +++ b/src/routines/level1/xscal.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xscal.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp index 2f2c0370..c9b97dc9 100644 --- a/src/routines/level1/xswap.cpp +++ b/src/routines/level1/xswap.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xswap.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 97dcd8ef..7b4c2e8f 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp index f22131bb..d16ebd11 100644 --- a/src/routines/level2/xger.cpp +++ b/src/routines/level2/xger.cpp @@ -22,11 +22,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xger.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp index 4cf27702..6c334e63 100644 --- a/src/routines/level2/xher.cpp +++ b/src/routines/level2/xher.cpp @@ -21,11 +21,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T, typename U> Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp index c93585de..11e2c871 100644 --- a/src/routines/level2/xher2.cpp +++ b/src/routines/level2/xher2.cpp @@ -21,11 +21,10 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher2.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index a6f7c286..4f70dc7a 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -24,8 +24,7 @@ template <typename T> Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name): Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"}, - PrecisionValue<T>()) { - source_string_ = + PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -37,13 +36,11 @@ Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_direct_part1.opencl" #include "../../kernels/level3/xgemm_direct_part2.opencl" #include "../../kernels/level3/xgemm_direct_part3.opencl" - ; - auto source_string_part_2 = // separated in two parts to prevent C1091 in MSVC 2013 + , // separated in two parts to prevent C1091 in MSVC 2013 #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; - source_string_ += source_string_part_2; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp index a326dfbe..7244c848 100644 --- a/src/routines/level3/xher2k.cpp +++ b/src/routines/level3/xher2k.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp index 6e36714e..865c6c37 100644 --- a/src/routines/level3/xherk.cpp +++ b/src/routines/level3/xherk.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp index b10ee586..826854a8 100644 --- a/src/routines/level3/xsyr2k.cpp +++ b/src/routines/level3/xsyr2k.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp index 93fd4666..9aa8ca2d 100644 --- a/src/routines/level3/xsyrk.cpp +++ b/src/routines/level3/xsyrk.cpp @@ -22,8 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" @@ -32,7 +31,7 @@ Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name): #include "../../kernels/level3/xgemm_part1.opencl" #include "../../kernels/level3/xgemm_part2.opencl" #include "../../kernels/level3/xgemm_part3.opencl" - ; + }) { } // ================================================================================================= diff --git a/src/routines/levelx/xomatcopy.cpp b/src/routines/levelx/xomatcopy.cpp index e053c076..875ca7d2 100644 --- a/src/routines/levelx/xomatcopy.cpp +++ b/src/routines/levelx/xomatcopy.cpp @@ -22,14 +22,13 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xomatcopy<T>::Xomatcopy(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" #include "../../kernels/level3/copy_pad.opencl" #include "../../kernels/level3/transpose_fast.opencl" #include "../../kernels/level3/transpose_pad.opencl" - ; + }) { } // ================================================================================================= |