summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIvan Shapovalov <intelfx@intelfx.name>2016-10-18 04:53:06 +0300
committerIvan Shapovalov <intelfx@intelfx.name>2016-10-22 08:45:27 +0300
commit56f300607b1d0b81ab3269894fda5a066c46cdeb (patch)
tree60b4c5566cc4bbfad15a7791a4e20c2a60e16707 /src
parentb98af44fcf89b9946e1de438b1f5527e6bf28905 (diff)
Routine: get rid of ::SetUp()
Since we now use C++ exceptions inside the implementation (and exceptions can be thrown from constructors), there is no need for a separate Routine::SetUp() function. For this, we also change the way how the kernel source string is constructed. The kernel-specific source code is now passed to the Routine ctor via an initializer_list of C strings to avoid unnecessary data copying while also working around C1091 of MSVC 2013.
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cpp134
-rw-r--r--src/routine.cpp39
-rw-r--r--src/routine.hpp10
-rw-r--r--src/routines/level1/xamax.cpp5
-rw-r--r--src/routines/level1/xasum.cpp5
-rw-r--r--src/routines/level1/xaxpy.cpp5
-rw-r--r--src/routines/level1/xcopy.cpp5
-rw-r--r--src/routines/level1/xdot.cpp5
-rw-r--r--src/routines/level1/xnrm2.cpp5
-rw-r--r--src/routines/level1/xscal.cpp5
-rw-r--r--src/routines/level1/xswap.cpp5
-rw-r--r--src/routines/level2/xgemv.cpp5
-rw-r--r--src/routines/level2/xger.cpp5
-rw-r--r--src/routines/level2/xher.cpp5
-rw-r--r--src/routines/level2/xher2.cpp5
-rw-r--r--src/routines/level3/xgemm.cpp9
-rw-r--r--src/routines/level3/xher2k.cpp5
-rw-r--r--src/routines/level3/xherk.cpp5
-rw-r--r--src/routines/level3/xsyr2k.cpp5
-rw-r--r--src/routines/level3/xsyrk.cpp5
-rw-r--r--src/routines/levelx/xomatcopy.cpp5
21 files changed, 104 insertions, 173 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp
index 8e7b042f..4bb4e0b3 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -171,7 +171,6 @@ StatusCode Swap(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xswap<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSwap(n,
Buffer<T>(x_buffer), x_offset, x_inc,
Buffer<T>(y_buffer), y_offset, y_inc);
@@ -208,7 +207,6 @@ StatusCode Scal(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xscal<T>(queue_cpp, event);
- routine.SetUp();
routine.DoScal(n,
alpha,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -245,7 +243,6 @@ StatusCode Copy(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xcopy<T>(queue_cpp, event);
- routine.SetUp();
routine.DoCopy(n,
Buffer<T>(x_buffer), x_offset, x_inc,
Buffer<T>(y_buffer), y_offset, y_inc);
@@ -283,7 +280,6 @@ StatusCode Axpy(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xaxpy<T>(queue_cpp, event);
- routine.SetUp();
routine.DoAxpy(n,
alpha,
Buffer<T>(x_buffer), x_offset, x_inc,
@@ -327,7 +323,6 @@ StatusCode Dot(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xdot<T>(queue_cpp, event);
- routine.SetUp();
routine.DoDot(n,
Buffer<T>(dot_buffer), dot_offset,
Buffer<T>(x_buffer), x_offset, x_inc,
@@ -361,7 +356,6 @@ StatusCode Dotu(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xdotu<T>(queue_cpp, event);
- routine.SetUp();
routine.DoDotu(n,
Buffer<T>(dot_buffer), dot_offset,
Buffer<T>(x_buffer), x_offset, x_inc,
@@ -390,7 +384,6 @@ StatusCode Dotc(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xdotc<T>(queue_cpp, event);
- routine.SetUp();
routine.DoDotc(n,
Buffer<T>(dot_buffer), dot_offset,
Buffer<T>(x_buffer), x_offset, x_inc,
@@ -418,7 +411,6 @@ StatusCode Nrm2(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xnrm2<T>(queue_cpp, event);
- routine.SetUp();
routine.DoNrm2(n,
Buffer<T>(nrm2_buffer), nrm2_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -455,7 +447,6 @@ StatusCode Asum(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xasum<T>(queue_cpp, event);
- routine.SetUp();
routine.DoAsum(n,
Buffer<T>(asum_buffer), asum_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -492,7 +483,6 @@ StatusCode Sum(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsum<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSum(n,
Buffer<T>(sum_buffer), sum_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -529,7 +519,6 @@ StatusCode Amax(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xamax<T>(queue_cpp, event);
- routine.SetUp();
routine.DoAmax(n,
Buffer<unsigned int>(imax_buffer), imax_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -566,7 +555,6 @@ StatusCode Max(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xmax<T>(queue_cpp, event);
- routine.SetUp();
routine.DoMax(n,
Buffer<unsigned int>(imax_buffer), imax_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -603,7 +591,6 @@ StatusCode Min(const size_t n,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xmin<T>(queue_cpp, event);
- routine.SetUp();
routine.DoMin(n,
Buffer<unsigned int>(imin_buffer), imin_offset,
Buffer<T>(x_buffer), x_offset, x_inc);
@@ -648,7 +635,6 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xgemv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGemv(layout, a_transpose,
m, n,
alpha,
@@ -713,7 +699,6 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xgbmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGbmv(layout, a_transpose,
m, n, kl, ku,
alpha,
@@ -778,7 +763,6 @@ StatusCode Hemv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhemv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHemv(layout, triangle,
n,
alpha,
@@ -819,7 +803,6 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhbmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHbmv(layout, triangle,
n, k,
alpha,
@@ -860,7 +843,6 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhpmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHpmv(layout, triangle,
n,
alpha,
@@ -901,7 +883,6 @@ StatusCode Symv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsymv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSymv(layout, triangle,
n,
alpha,
@@ -950,7 +931,6 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsbmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSbmv(layout, triangle,
n, k,
alpha,
@@ -999,7 +979,6 @@ StatusCode Spmv(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xspmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSpmv(layout, triangle,
n,
alpha,
@@ -1045,7 +1024,6 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_
try {
auto queue_cpp = Queue(*queue);
auto routine = Xtrmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoTrmv(layout, triangle, a_transpose, diagonal,
n,
Buffer<T>(a_buffer), a_offset, a_ld,
@@ -1089,7 +1067,6 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_
try {
auto queue_cpp = Queue(*queue);
auto routine = Xtbmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoTbmv(layout, triangle, a_transpose, diagonal,
n, k,
Buffer<T>(a_buffer), a_offset, a_ld,
@@ -1133,7 +1110,6 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_
try {
auto queue_cpp = Queue(*queue);
auto routine = Xtpmv<T>(queue_cpp, event);
- routine.SetUp();
routine.DoTpmv(layout, triangle, a_transpose, diagonal,
n,
Buffer<T>(ap_buffer), ap_offset,
@@ -1269,7 +1245,6 @@ StatusCode Ger(const Layout layout,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xger<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGer(layout,
m, n,
alpha,
@@ -1313,7 +1288,6 @@ StatusCode Geru(const Layout layout,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xgeru<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGeru(layout,
m, n,
alpha,
@@ -1350,7 +1324,6 @@ StatusCode Gerc(const Layout layout,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xgerc<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGerc(layout,
m, n,
alpha,
@@ -1386,7 +1359,6 @@ StatusCode Her(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xher<std::complex<T>,T>(queue_cpp, event);
- routine.SetUp();
routine.DoHer(layout, triangle,
n,
alpha,
@@ -1419,7 +1391,6 @@ StatusCode Hpr(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event);
- routine.SetUp();
routine.DoHpr(layout, triangle,
n,
alpha,
@@ -1453,7 +1424,6 @@ StatusCode Her2(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xher2<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHer2(layout, triangle,
n,
alpha,
@@ -1490,7 +1460,6 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhpr2<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHpr2(layout, triangle,
n,
alpha,
@@ -1526,7 +1495,6 @@ StatusCode Syr(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsyr<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSyr(layout, triangle,
n,
alpha,
@@ -1565,7 +1533,6 @@ StatusCode Spr(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xspr<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSpr(layout, triangle,
n,
alpha,
@@ -1605,7 +1572,6 @@ StatusCode Syr2(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsyr2<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSyr2(layout, triangle,
n,
alpha,
@@ -1649,7 +1615,6 @@ StatusCode Spr2(const Layout layout, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xspr2<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSpr2(layout, triangle,
n,
alpha,
@@ -1698,7 +1663,6 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
try {
auto queue_cpp = Queue(*queue);
auto routine = Xgemm<T>(queue_cpp, event);
- routine.SetUp();
routine.DoGemm(layout, a_transpose, b_transpose,
m, n, k,
alpha,
@@ -1763,7 +1727,6 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsymm<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSymm(layout, side, triangle,
m, n,
alpha,
@@ -1828,7 +1791,6 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xhemm<T>(queue_cpp, event);
- routine.SetUp();
routine.DoHemm(layout, side, triangle,
m, n,
alpha,
@@ -1868,7 +1830,6 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsyrk<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSyrk(layout, triangle, a_transpose,
n, k,
alpha,
@@ -1926,7 +1887,6 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
try {
auto queue_cpp = Queue(*queue);
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event);
- routine.SetUp();
routine.DoHerk(layout, triangle, a_transpose,
n, k,
alpha,
@@ -1964,7 +1924,6 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
try {
auto queue_cpp = Queue(*queue);
auto routine = Xsyr2k<T>(queue_cpp, event);
- routine.SetUp();
routine.DoSyr2k(layout, triangle, ab_transpose,
n, k,
alpha,
@@ -2029,7 +1988,6 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
try {
auto queue_cpp = Queue(*queue);
auto routine = Xher2k<T,U>(queue_cpp, event);
- routine.SetUp();
routine.DoHer2k(layout, triangle, ab_transpose,
n, k,
alpha,
@@ -2068,7 +2026,6 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c
try {
auto queue_cpp = Queue(*queue);
auto routine = Xtrmm<T>(queue_cpp, event);
- routine.SetUp();
routine.DoTrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
@@ -2164,7 +2121,6 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
try {
auto queue_cpp = Queue(*queue);
auto routine = Xomatcopy<T>(queue_cpp, event);
- routine.SetUp();
routine.DoOmatcopy(layout, a_transpose,
m, n,
alpha,
@@ -2225,57 +2181,57 @@ StatusCode FillCache(const cl_device_id device) {
auto queue = Queue(context, device_cpp);
// Runs all the level 1 set-up functions
- Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp();
- Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp();
- Xscal<float>(queue, nullptr).SetUp(); Xscal<double>(queue, nullptr).SetUp(); Xscal<float2>(queue, nullptr).SetUp(); Xscal<double2>(queue, nullptr).SetUp();
- Xcopy<float>(queue, nullptr).SetUp(); Xcopy<double>(queue, nullptr).SetUp(); Xcopy<float2>(queue, nullptr).SetUp(); Xcopy<double2>(queue, nullptr).SetUp();
- Xaxpy<float>(queue, nullptr).SetUp(); Xaxpy<double>(queue, nullptr).SetUp(); Xaxpy<float2>(queue, nullptr).SetUp(); Xaxpy<double2>(queue, nullptr).SetUp();
- Xdot<float>(queue, nullptr).SetUp(); Xdot<double>(queue, nullptr).SetUp();
- Xdotu<float2>(queue, nullptr).SetUp(); Xdotu<double2>(queue, nullptr).SetUp();
- Xdotc<float2>(queue, nullptr).SetUp(); Xdotc<double2>(queue, nullptr).SetUp();
- Xnrm2<float>(queue, nullptr).SetUp(); Xnrm2<double>(queue, nullptr).SetUp(); Xnrm2<float2>(queue, nullptr).SetUp(); Xnrm2<double2>(queue, nullptr).SetUp();
- Xasum<float>(queue, nullptr).SetUp(); Xasum<double>(queue, nullptr).SetUp(); Xasum<float2>(queue, nullptr).SetUp(); Xasum<double2>(queue, nullptr).SetUp();
- Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp();
- Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp();
- Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp();
- Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp();
+ Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr);
+ Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr);
+ Xscal<float>(queue, nullptr); Xscal<double>(queue, nullptr); Xscal<float2>(queue, nullptr); Xscal<double2>(queue, nullptr);
+ Xcopy<float>(queue, nullptr); Xcopy<double>(queue, nullptr); Xcopy<float2>(queue, nullptr); Xcopy<double2>(queue, nullptr);
+ Xaxpy<float>(queue, nullptr); Xaxpy<double>(queue, nullptr); Xaxpy<float2>(queue, nullptr); Xaxpy<double2>(queue, nullptr);
+ Xdot<float>(queue, nullptr); Xdot<double>(queue, nullptr);
+ Xdotu<float2>(queue, nullptr); Xdotu<double2>(queue, nullptr);
+ Xdotc<float2>(queue, nullptr); Xdotc<double2>(queue, nullptr);
+ Xnrm2<float>(queue, nullptr); Xnrm2<double>(queue, nullptr); Xnrm2<float2>(queue, nullptr); Xnrm2<double2>(queue, nullptr);
+ Xasum<float>(queue, nullptr); Xasum<double>(queue, nullptr); Xasum<float2>(queue, nullptr); Xasum<double2>(queue, nullptr);
+ Xsum<float>(queue, nullptr); Xsum<double>(queue, nullptr); Xsum<float2>(queue, nullptr); Xsum<double2>(queue, nullptr);
+ Xamax<float>(queue, nullptr); Xamax<double>(queue, nullptr); Xamax<float2>(queue, nullptr); Xamax<double2>(queue, nullptr);
+ Xmax<float>(queue, nullptr); Xmax<double>(queue, nullptr); Xmax<float2>(queue, nullptr); Xmax<double2>(queue, nullptr);
+ Xmin<float>(queue, nullptr); Xmin<double>(queue, nullptr); Xmin<float2>(queue, nullptr); Xmin<double2>(queue, nullptr);
// Runs all the level 2 set-up functions
- Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp();
- Xgbmv<float>(queue, nullptr).SetUp(); Xgbmv<double>(queue, nullptr).SetUp(); Xgbmv<float2>(queue, nullptr).SetUp(); Xgbmv<double2>(queue, nullptr).SetUp();
- Xhemv<float2>(queue, nullptr).SetUp(); Xhemv<double2>(queue, nullptr).SetUp();
- Xhbmv<float2>(queue, nullptr).SetUp(); Xhbmv<double2>(queue, nullptr).SetUp();
- Xhpmv<float2>(queue, nullptr).SetUp(); Xhpmv<double2>(queue, nullptr).SetUp();
- Xsymv<float>(queue, nullptr).SetUp(); Xsymv<double>(queue, nullptr).SetUp();
- Xsbmv<float>(queue, nullptr).SetUp(); Xsbmv<double>(queue, nullptr).SetUp();
- Xspmv<float>(queue, nullptr).SetUp(); Xspmv<double>(queue, nullptr).SetUp();
- Xtrmv<float>(queue, nullptr).SetUp(); Xtrmv<double>(queue, nullptr).SetUp(); Xtrmv<float2>(queue, nullptr).SetUp(); Xtrmv<double2>(queue, nullptr).SetUp();
- Xtbmv<float>(queue, nullptr).SetUp(); Xtbmv<double>(queue, nullptr).SetUp(); Xtbmv<float2>(queue, nullptr).SetUp(); Xtbmv<double2>(queue, nullptr).SetUp();
- Xtpmv<float>(queue, nullptr).SetUp(); Xtpmv<double>(queue, nullptr).SetUp(); Xtpmv<float2>(queue, nullptr).SetUp(); Xtpmv<double2>(queue, nullptr).SetUp();
- Xger<float>(queue, nullptr).SetUp(); Xger<double>(queue, nullptr).SetUp();
- Xgeru<float2>(queue, nullptr).SetUp(); Xgeru<double2>(queue, nullptr).SetUp();
- Xgerc<float2>(queue, nullptr).SetUp(); Xgerc<double2>(queue, nullptr).SetUp();
- Xher<float2,float>(queue, nullptr).SetUp(); Xher<double2,double>(queue, nullptr).SetUp();
- Xhpr<float2,float>(queue, nullptr).SetUp(); Xhpr<double2,double>(queue, nullptr).SetUp();
- Xher2<float2>(queue, nullptr).SetUp(); Xher2<double2>(queue, nullptr).SetUp();
- Xhpr2<float2>(queue, nullptr).SetUp(); Xhpr2<double2>(queue, nullptr).SetUp();
- Xsyr<float>(queue, nullptr).SetUp(); Xsyr<double>(queue, nullptr).SetUp();
- Xspr<float>(queue, nullptr).SetUp(); Xspr<double>(queue, nullptr).SetUp();
- Xsyr2<float>(queue, nullptr).SetUp(); Xsyr2<double>(queue, nullptr).SetUp();
- Xspr2<float>(queue, nullptr).SetUp(); Xspr2<double>(queue, nullptr).SetUp();
+ Xgemv<float>(queue, nullptr); Xgemv<double>(queue, nullptr); Xgemv<float2>(queue, nullptr); Xgemv<double2>(queue, nullptr);
+ Xgbmv<float>(queue, nullptr); Xgbmv<double>(queue, nullptr); Xgbmv<float2>(queue, nullptr); Xgbmv<double2>(queue, nullptr);
+ Xhemv<float2>(queue, nullptr); Xhemv<double2>(queue, nullptr);
+ Xhbmv<float2>(queue, nullptr); Xhbmv<double2>(queue, nullptr);
+ Xhpmv<float2>(queue, nullptr); Xhpmv<double2>(queue, nullptr);
+ Xsymv<float>(queue, nullptr); Xsymv<double>(queue, nullptr);
+ Xsbmv<float>(queue, nullptr); Xsbmv<double>(queue, nullptr);
+ Xspmv<float>(queue, nullptr); Xspmv<double>(queue, nullptr);
+ Xtrmv<float>(queue, nullptr); Xtrmv<double>(queue, nullptr); Xtrmv<float2>(queue, nullptr); Xtrmv<double2>(queue, nullptr);
+ Xtbmv<float>(queue, nullptr); Xtbmv<double>(queue, nullptr); Xtbmv<float2>(queue, nullptr); Xtbmv<double2>(queue, nullptr);
+ Xtpmv<float>(queue, nullptr); Xtpmv<double>(queue, nullptr); Xtpmv<float2>(queue, nullptr); Xtpmv<double2>(queue, nullptr);
+ Xger<float>(queue, nullptr); Xger<double>(queue, nullptr);
+ Xgeru<float2>(queue, nullptr); Xgeru<double2>(queue, nullptr);
+ Xgerc<float2>(queue, nullptr); Xgerc<double2>(queue, nullptr);
+ Xher<float2,float>(queue, nullptr); Xher<double2,double>(queue, nullptr);
+ Xhpr<float2,float>(queue, nullptr); Xhpr<double2,double>(queue, nullptr);
+ Xher2<float2>(queue, nullptr); Xher2<double2>(queue, nullptr);
+ Xhpr2<float2>(queue, nullptr); Xhpr2<double2>(queue, nullptr);
+ Xsyr<float>(queue, nullptr); Xsyr<double>(queue, nullptr);
+ Xspr<float>(queue, nullptr); Xspr<double>(queue, nullptr);
+ Xsyr2<float>(queue, nullptr); Xsyr2<double>(queue, nullptr);
+ Xspr2<float>(queue, nullptr); Xspr2<double>(queue, nullptr);
// Runs all the level 3 set-up functions
- Xgemm<float>(queue, nullptr).SetUp(); Xgemm<double>(queue, nullptr).SetUp(); Xgemm<float2>(queue, nullptr).SetUp(); Xgemm<double2>(queue, nullptr).SetUp();
- Xsymm<float>(queue, nullptr).SetUp(); Xsymm<double>(queue, nullptr).SetUp(); Xsymm<float2>(queue, nullptr).SetUp(); Xsymm<double2>(queue, nullptr).SetUp();
- Xhemm<float2>(queue, nullptr).SetUp(); Xhemm<double2>(queue, nullptr).SetUp();
- Xsyrk<float>(queue, nullptr).SetUp(); Xsyrk<double>(queue, nullptr).SetUp(); Xsyrk<float2>(queue, nullptr).SetUp(); Xsyrk<double2>(queue, nullptr).SetUp();
- Xherk<float2,float>(queue, nullptr).SetUp(); Xherk<double2,double>(queue, nullptr).SetUp();
- Xsyr2k<float>(queue, nullptr).SetUp(); Xsyr2k<double>(queue, nullptr).SetUp(); Xsyr2k<float2>(queue, nullptr).SetUp(); Xsyr2k<double2>(queue, nullptr).SetUp();
- Xher2k<float2,float>(queue, nullptr).SetUp(); Xher2k<double2,double>(queue, nullptr).SetUp();
- Xtrmm<float>(queue, nullptr).SetUp(); Xtrmm<double>(queue, nullptr).SetUp(); Xtrmm<float2>(queue, nullptr).SetUp(); Xtrmm<double2>(queue, nullptr).SetUp();
+ Xgemm<float>(queue, nullptr); Xgemm<double>(queue, nullptr); Xgemm<float2>(queue, nullptr); Xgemm<double2>(queue, nullptr);
+ Xsymm<float>(queue, nullptr); Xsymm<double>(queue, nullptr); Xsymm<float2>(queue, nullptr); Xsymm<double2>(queue, nullptr);
+ Xhemm<float2>(queue, nullptr); Xhemm<double2>(queue, nullptr);
+ Xsyrk<float>(queue, nullptr); Xsyrk<double>(queue, nullptr); Xsyrk<float2>(queue, nullptr); Xsyrk<double2>(queue, nullptr);
+ Xherk<float2,float>(queue, nullptr); Xherk<double2,double>(queue, nullptr);
+ Xsyr2k<float>(queue, nullptr); Xsyr2k<double>(queue, nullptr); Xsyr2k<float2>(queue, nullptr); Xsyr2k<double2>(queue, nullptr);
+ Xher2k<float2,float>(queue, nullptr); Xher2k<double2,double>(queue, nullptr);
+ Xtrmm<float>(queue, nullptr); Xtrmm<double>(queue, nullptr); Xtrmm<float2>(queue, nullptr); Xtrmm<double2>(queue, nullptr);
// Runs all the level 3 set-up functions
- Xomatcopy<float>(queue, nullptr).SetUp(); Xomatcopy<double>(queue, nullptr).SetUp(); Xomatcopy<float2>(queue, nullptr).SetUp(); Xomatcopy<double2>(queue, nullptr).SetUp();
+ Xomatcopy<float>(queue, nullptr); Xomatcopy<double>(queue, nullptr); Xomatcopy<float2>(queue, nullptr); Xomatcopy<double2>(queue, nullptr);
} catch (...) { return DispatchException(); }
return StatusCode::kSuccess;
diff --git a/src/routine.cpp b/src/routine.cpp
index 5e3a9dfe..acafb0d2 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -21,10 +21,11 @@
namespace clblast {
// =================================================================================================
-// Constructor: not much here, because no status codes can be returned
+// The constructor does all heavy work, errors are returned as exceptions
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase):
+ const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ std::initializer_list<const char *> source):
precision_(precision),
routine_name_(name),
queue_(queue),
@@ -33,12 +34,6 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
device_(queue_.GetDevice()),
device_name_(device_.Name()),
db_(queue_, routines, precision_, userDatabase) {
-}
-
-// =================================================================================================
-
-// Separate set-up function to allow for status codes to be returned
-void Routine::SetUp() {
// Queries the cache to see whether or not the program (context-specific) is already there
if (ProgramIsInCache(context_, precision_, routine_name_)) { return; }
@@ -77,37 +72,39 @@ void Routine::SetUp() {
}
}
- // Loads the common header (typedefs and defines and such)
- std::string common_header =
- #include "kernels/common.opencl"
- ;
-
// Collects the parameters for this device in the form of defines, and adds the precision
- auto defines = db_.GetDefines();
- defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ auto source_string = db_.GetDefines();
+ source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
// Adds the name of the routine as a define
- defines += "#define ROUTINE_"+routine_name_+"\n";
+ source_string += "#define ROUTINE_"+routine_name_+"\n";
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_CL_MAD 1\n";
+ source_string += "#define USE_CL_MAD 1\n";
}
// For specific devices, use staggered/shuffled workgroup indices.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_STAGGERED_INDICES 1\n";
+ source_string += "#define USE_STAGGERED_INDICES 1\n";
}
// For specific devices add a global synchronisation barrier to the GEMM kernel to optimize
// performance through better cache behaviour
if (device_.IsARM() && device_.IsGPU()) {
- defines += "#define GLOBAL_MEM_FENCE 1\n";
+ source_string += "#define GLOBAL_MEM_FENCE 1\n";
}
- // Combines everything together into a single source string
- const auto source_string = defines + common_header + source_string_;
+ // Loads the common header (typedefs and defines and such)
+ source_string +=
+ #include "kernels/common.opencl"
+ ;
+
+ // Adds routine-specific code to the constructed source string
+ for (const char *s: source) {
+ source_string += s;
+ }
// Prints details of the routine to compile in case of debugging in verbose mode
#ifdef VERBOSE
diff --git a/src/routine.hpp b/src/routine.hpp
index c2bcdaff..f4ad435e 100644
--- a/src/routine.hpp
+++ b/src/routine.hpp
@@ -34,21 +34,19 @@ class Routine {
// Base class constructor. The user database is an optional extra database to override the
// built-in database.
+ // All heavy preparation work is done inside this constructor.
explicit Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase = {});
-
- // Set-up phase of the kernel
- void SetUp();
+ const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ std::initializer_list<const char *> source);
protected:
// Non-static variable for the precision
const Precision precision_;
- // The routine's name and its kernel-source in string form
+ // The routine's name
const std::string routine_name_;
- std::string source_string_;
// The OpenCL objects, accessible only from derived classes
Queue queue_;
diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp
index 8307188b..e9efa1a7 100644
--- a/src/routines/level1/xamax.cpp
+++ b/src/routines/level1/xamax.cpp
@@ -22,10 +22,9 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xamax.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp
index 9dde7a87..a242a5fa 100644
--- a/src/routines/level1/xasum.cpp
+++ b/src/routines/level1/xasum.cpp
@@ -22,10 +22,9 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xasum<T>::Xasum(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xasum.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp
index cbcbb3cd..5436c5b7 100644
--- a/src/routines/level1/xaxpy.cpp
+++ b/src/routines/level1/xaxpy.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xaxpy.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp
index 3bfbada6..d86200c0 100644
--- a/src/routines/level1/xcopy.cpp
+++ b/src/routines/level1/xcopy.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xcopy.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp
index e0d297f8..9d718913 100644
--- a/src/routines/level1/xdot.cpp
+++ b/src/routines/level1/xdot.cpp
@@ -22,10 +22,9 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xdot.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp
index eb795498..373820a4 100644
--- a/src/routines/level1/xnrm2.cpp
+++ b/src/routines/level1/xnrm2.cpp
@@ -22,10 +22,9 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xnrm2.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp
index ed126879..17410f01 100644
--- a/src/routines/level1/xscal.cpp
+++ b/src/routines/level1/xscal.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xscal.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp
index 2f2c0370..c9b97dc9 100644
--- a/src/routines/level1/xswap.cpp
+++ b/src/routines/level1/xswap.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xswap.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp
index 97dcd8ef..7b4c2e8f 100644
--- a/src/routines/level2/xgemv.cpp
+++ b/src/routines/level2/xgemv.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/xgemv.opencl"
#include "../../kernels/level2/xgemv_fast.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp
index f22131bb..d16ebd11 100644
--- a/src/routines/level2/xger.cpp
+++ b/src/routines/level2/xger.cpp
@@ -22,11 +22,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xger.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp
index 4cf27702..6c334e63 100644
--- a/src/routines/level2/xher.cpp
+++ b/src/routines/level2/xher.cpp
@@ -21,11 +21,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp
index c93585de..11e2c871 100644
--- a/src/routines/level2/xher2.cpp
+++ b/src/routines/level2/xher2.cpp
@@ -21,11 +21,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher2.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index a6f7c286..4f70dc7a 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -24,8 +24,7 @@ template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
Routine(queue, event, name,
{"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
- PrecisionValue<T>()) {
- source_string_ =
+ PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -37,13 +36,11 @@ Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_direct_part1.opencl"
#include "../../kernels/level3/xgemm_direct_part2.opencl"
#include "../../kernels/level3/xgemm_direct_part3.opencl"
- ;
- auto source_string_part_2 = // separated in two parts to prevent C1091 in MSVC 2013
+ , // separated in two parts to prevent C1091 in MSVC 2013
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
- source_string_ += source_string_part_2;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp
index a326dfbe..7244c848 100644
--- a/src/routines/level3/xher2k.cpp
+++ b/src/routines/level3/xher2k.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,7 +31,7 @@ Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp
index 6e36714e..865c6c37 100644
--- a/src/routines/level3/xherk.cpp
+++ b/src/routines/level3/xherk.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,7 +31,7 @@ Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp
index b10ee586..826854a8 100644
--- a/src/routines/level3/xsyr2k.cpp
+++ b/src/routines/level3/xsyr2k.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,7 +31,7 @@ Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp
index 93fd4666..9aa8ca2d 100644
--- a/src/routines/level3/xsyrk.cpp
+++ b/src/routines/level3/xsyrk.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,7 +31,7 @@ Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
diff --git a/src/routines/levelx/xomatcopy.cpp b/src/routines/levelx/xomatcopy.cpp
index e053c076..875ca7d2 100644
--- a/src/routines/levelx/xomatcopy.cpp
+++ b/src/routines/levelx/xomatcopy.cpp
@@ -22,14 +22,13 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xomatcopy<T>::Xomatcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
#include "../../kernels/level3/transpose_fast.opencl"
#include "../../kernels/level3/transpose_pad.opencl"
- ;
+ }) {
}
// =================================================================================================