summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2015-10-17 15:30:06 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2015-10-17 15:30:06 +0200
commit653feca5644592003345f5832bb81138cc07d783 (patch)
tree61ebab5da9b8807ad1d085e2a249df39b861db0e
parent92b4b0d1feaaf92e160fa0342daf4269f24fb4d2 (diff)
parent0d4091fdfbb6ed29c5b6f73b4d272fdaa01c3ba8 (diff)
Merge pull request #28 from CNugteren/kernels_reorganization
Kernels re-organization level-3
-rw-r--r--CHANGELOG1
-rw-r--r--include/internal/routines/level1/xaxpy.h2
-rw-r--r--include/internal/routines/level1/xcopy.h2
-rw-r--r--include/internal/routines/level1/xscal.h2
-rw-r--r--include/internal/routines/level1/xswap.h2
-rw-r--r--include/internal/routines/level3/xgemm.h2
-rw-r--r--include/internal/routines/level3/xhemm.h2
-rw-r--r--include/internal/routines/level3/xher2k.h2
-rw-r--r--include/internal/routines/level3/xherk.h2
-rw-r--r--include/internal/routines/level3/xsymm.h2
-rw-r--r--include/internal/routines/level3/xsyr2k.h2
-rw-r--r--include/internal/routines/level3/xsyrk.h2
-rw-r--r--include/internal/routines/level3/xtrmm.h2
-rw-r--r--src/kernels/level3/copy.opencl (renamed from src/kernels/copy.opencl)0
-rw-r--r--src/kernels/level3/pad.opencl (renamed from src/kernels/pad.opencl)6
-rw-r--r--src/kernels/level3/padtranspose.opencl (renamed from src/kernels/padtranspose.opencl)0
-rw-r--r--src/kernels/level3/transpose.opencl (renamed from src/kernels/transpose.opencl)0
-rw-r--r--src/kernels/level3/xgemm.opencl (renamed from src/kernels/xgemm.opencl)0
-rw-r--r--src/routines/level1/xaxpy.cc4
-rw-r--r--src/routines/level1/xcopy.cc4
-rw-r--r--src/routines/level1/xscal.cc4
-rw-r--r--src/routines/level1/xswap.cc4
-rw-r--r--src/routines/level3/xgemm.cc14
-rw-r--r--src/routines/level3/xhemm.cc4
-rw-r--r--src/routines/level3/xher2k.cc14
-rw-r--r--src/routines/level3/xherk.cc14
-rw-r--r--src/routines/level3/xsymm.cc4
-rw-r--r--src/routines/level3/xsyr2k.cc14
-rw-r--r--src/routines/level3/xsyrk.cc14
-rw-r--r--src/routines/level3/xtrmm.cc4
-rw-r--r--src/tuning/copy.cc2
-rw-r--r--src/tuning/pad.cc2
-rw-r--r--src/tuning/padtranspose.cc2
-rw-r--r--src/tuning/transpose.cc2
-rw-r--r--src/tuning/xgemm.cc2
35 files changed, 72 insertions, 67 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c1981d7a..91e9c6aa 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
Development version (next release)
- Improved structure and performance of level-2 routines (xSYMV/xHEMV)
+- Reduced compilation time of level-3 OpenCL kernels
- Added level-1 routines:
* SSWAP/DSWAP/CSWAP/ZSWAP
* SSCAL/DSCAL/CSCAL/ZSCAL
diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h
index 4b9da890..689cf169 100644
--- a/include/internal/routines/level1/xaxpy.h
+++ b/include/internal/routines/level1/xaxpy.h
@@ -35,7 +35,7 @@ class Xaxpy: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xaxpy(Queue &queue, Event &event);
+ Xaxpy(Queue &queue, Event &event, const std::string &name = "AXPY");
// Templated-precision implementation of the routine
StatusCode DoAxpy(const size_t n, const T alpha,
diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h
index c71583c5..15f339aa 100644
--- a/include/internal/routines/level1/xcopy.h
+++ b/include/internal/routines/level1/xcopy.h
@@ -35,7 +35,7 @@ class Xcopy: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xcopy(Queue &queue, Event &event);
+ Xcopy(Queue &queue, Event &event, const std::string &name = "COPY");
// Templated-precision implementation of the routine
StatusCode DoCopy(const size_t n,
diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h
index 0aa6059d..d97b5a07 100644
--- a/include/internal/routines/level1/xscal.h
+++ b/include/internal/routines/level1/xscal.h
@@ -34,7 +34,7 @@ class Xscal: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xscal(Queue &queue, Event &event);
+ Xscal(Queue &queue, Event &event, const std::string &name = "SCAL");
// Templated-precision implementation of the routine
StatusCode DoScal(const size_t n, const T alpha,
diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h
index 3dabc62c..fe79882b 100644
--- a/include/internal/routines/level1/xswap.h
+++ b/include/internal/routines/level1/xswap.h
@@ -35,7 +35,7 @@ class Xswap: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xswap(Queue &queue, Event &event);
+ Xswap(Queue &queue, Event &event, const std::string &name = "SWAP");
// Templated-precision implementation of the routine
StatusCode DoSwap(const size_t n,
diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h
index a0c8b595..9b40a7fc 100644
--- a/include/internal/routines/level3/xgemm.h
+++ b/include/internal/routines/level3/xgemm.h
@@ -38,7 +38,7 @@ class Xgemm: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xgemm(Queue &queue, Event &event);
+ Xgemm(Queue &queue, Event &event, const std::string &name = "GEMM");
// Templated-precision implementation of the routine
StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h
index 5f1e8723..ca38ca08 100644
--- a/include/internal/routines/level3/xhemm.h
+++ b/include/internal/routines/level3/xhemm.h
@@ -37,7 +37,7 @@ class Xhemm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xhemm(Queue &queue, Event &event);
+ Xhemm(Queue &queue, Event &event, const std::string &name = "HEMM");
// Templated-precision implementation of the routine
StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle,
diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h
index 9e961d23..7113a172 100644
--- a/include/internal/routines/level3/xher2k.h
+++ b/include/internal/routines/level3/xher2k.h
@@ -40,7 +40,7 @@ class Xher2k: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xher2k(Queue &queue, Event &event);
+ Xher2k(Queue &queue, Event &event, const std::string &name = "HER2K");
// Templated-precision implementation of the routine
StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h
index f285a71c..47112c2c 100644
--- a/include/internal/routines/level3/xherk.h
+++ b/include/internal/routines/level3/xherk.h
@@ -39,7 +39,7 @@ class Xherk: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xherk(Queue &queue, Event &event);
+ Xherk(Queue &queue, Event &event, const std::string &name = "HERK");
// Templated-precision implementation of the routine
StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h
index 9ed3c722..9fc80eb4 100644
--- a/include/internal/routines/level3/xsymm.h
+++ b/include/internal/routines/level3/xsymm.h
@@ -39,7 +39,7 @@ class Xsymm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xsymm(Queue &queue, Event &event);
+ Xsymm(Queue &queue, Event &event, const std::string &name = "SYMM");
// Templated-precision implementation of the routine
StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle,
diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h
index 85936658..c4679028 100644
--- a/include/internal/routines/level3/xsyr2k.h
+++ b/include/internal/routines/level3/xsyr2k.h
@@ -40,7 +40,7 @@ class Xsyr2k: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xsyr2k(Queue &queue, Event &event);
+ Xsyr2k(Queue &queue, Event &event, const std::string &name = "SYR2K");
// Templated-precision implementation of the routine
StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h
index 14d51a58..abf6b681 100644
--- a/include/internal/routines/level3/xsyrk.h
+++ b/include/internal/routines/level3/xsyrk.h
@@ -41,7 +41,7 @@ class Xsyrk: public Routine<T> {
using Routine<T>::ErrorIn;
// Constructor
- Xsyrk(Queue &queue, Event &event);
+ Xsyrk(Queue &queue, Event &event, const std::string &name = "SYRK");
// Templated-precision implementation of the routine
StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h
index d8ac60fd..a1f4d15c 100644
--- a/include/internal/routines/level3/xtrmm.h
+++ b/include/internal/routines/level3/xtrmm.h
@@ -38,7 +38,7 @@ class Xtrmm: public Xgemm<T> {
using Xgemm<T>::DoGemm;
// Constructor
- Xtrmm(Queue &queue, Event &event);
+ Xtrmm(Queue &queue, Event &event, const std::string &name = "TRMM");
// Templated-precision implementation of the routine
StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle,
diff --git a/src/kernels/copy.opencl b/src/kernels/level3/copy.opencl
index 7dde688b..7dde688b 100644
--- a/src/kernels/copy.opencl
+++ b/src/kernels/level3/copy.opencl
diff --git a/src/kernels/pad.opencl b/src/kernels/level3/pad.opencl
index 69324f20..eefddce4 100644
--- a/src/kernels/pad.opencl
+++ b/src/kernels/level3/pad.opencl
@@ -117,6 +117,7 @@ __kernel void UnPadMatrix(const int src_one, const int src_two,
}
// =================================================================================================
+#if defined(ROUTINE_SYMM)
// Kernel to populate a squared symmetric matrix, given that the triangle which holds the data is
// stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters.
@@ -185,8 +186,9 @@ __kernel void SymmUpperToSquared(const int src_dim,
}
}
+#endif
// =================================================================================================
-#if PRECISION == 3232 || PRECISION == 6464
+#if defined(ROUTINE_HEMM) && (PRECISION == 3232 || PRECISION == 6464)
// Kernel to populate a squared hermitian matrix, given that the triangle which holds the data is
// stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters.
@@ -269,6 +271,7 @@ __kernel void HermUpperToSquared(const int src_dim,
#endif
// =================================================================================================
+#if defined(ROUTINE_TRMM)
// Kernel to populate a squared triangular matrix, given that the triangle which holds the data is
// stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters.
@@ -341,6 +344,7 @@ __kernel void TrmmUpperToSquared(const int src_dim,
}
}
+#endif
// =================================================================================================
// End of the C++11 raw string literal
diff --git a/src/kernels/padtranspose.opencl b/src/kernels/level3/padtranspose.opencl
index a6b70f0b..a6b70f0b 100644
--- a/src/kernels/padtranspose.opencl
+++ b/src/kernels/level3/padtranspose.opencl
diff --git a/src/kernels/transpose.opencl b/src/kernels/level3/transpose.opencl
index d726f7ec..d726f7ec 100644
--- a/src/kernels/transpose.opencl
+++ b/src/kernels/level3/transpose.opencl
diff --git a/src/kernels/xgemm.opencl b/src/kernels/level3/xgemm.opencl
index 8db0f557..8db0f557 100644
--- a/src/kernels/xgemm.opencl
+++ b/src/kernels/level3/xgemm.opencl
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index ce138fa6..f37a0724 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -29,8 +29,8 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xaxpy<T>::Xaxpy(Queue &queue, Event &event):
- Routine<T>(queue, event, "AXPY", {"Xaxpy"}, precision_) {
+Xaxpy<T>::Xaxpy(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xaxpy.opencl"
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 52e029b9..2b00d43f 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -29,8 +29,8 @@ template <> const Precision Xcopy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xcopy<T>::Xcopy(Queue &queue, Event &event):
- Routine<T>(queue, event, "COPY", {"Xaxpy"}, precision_) {
+Xcopy<T>::Xcopy(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xcopy.opencl"
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index 13e1080c..3fc36b3d 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -29,8 +29,8 @@ template <> const Precision Xscal<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xscal<T>::Xscal(Queue &queue, Event &event):
- Routine<T>(queue, event, "SCAL", {"Xaxpy"}, precision_) {
+Xscal<T>::Xscal(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xscal.opencl"
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index b22b3bdb..123977d3 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -29,8 +29,8 @@ template <> const Precision Xswap<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xswap<T>::Xswap(Queue &queue, Event &event):
- Routine<T>(queue, event, "SWAP", {"Xaxpy"}, precision_) {
+Xswap<T>::Xswap(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xswap.opencl"
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 525a82e6..94aadcad 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -29,14 +29,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xgemm<T>::Xgemm(Queue &queue, Event &event):
- Routine<T>(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+Xgemm<T>::Xgemm(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
- #include "../../kernels/copy.opencl"
- #include "../../kernels/pad.opencl"
- #include "../../kernels/transpose.opencl"
- #include "../../kernels/padtranspose.opencl"
- #include "../../kernels/xgemm.opencl"
+ #include "../../kernels/level3/copy.opencl"
+ #include "../../kernels/level3/pad.opencl"
+ #include "../../kernels/level3/transpose.opencl"
+ #include "../../kernels/level3/padtranspose.opencl"
+ #include "../../kernels/level3/xgemm.opencl"
;
}
diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc
index a1c0c7c1..bcc60dee 100644
--- a/src/routines/level3/xhemm.cc
+++ b/src/routines/level3/xhemm.cc
@@ -21,8 +21,8 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
-Xhemm<T>::Xhemm(Queue &queue, Event &event):
- Xgemm<T>(queue, event) {
+Xhemm<T>::Xhemm(Queue &queue, Event &event, const std::string &name):
+ Xgemm<T>(queue, event, name) {
}
// =================================================================================================
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index 29b2f733..5b84decb 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -27,14 +27,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor
template <typename T, typename U>
-Xher2k<T,U>::Xher2k(Queue &queue, Event &event):
- Routine<T>(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+Xher2k<T,U>::Xher2k(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
- #include "../../kernels/copy.opencl"
- #include "../../kernels/pad.opencl"
- #include "../../kernels/transpose.opencl"
- #include "../../kernels/padtranspose.opencl"
- #include "../../kernels/xgemm.opencl"
+ #include "../../kernels/level3/copy.opencl"
+ #include "../../kernels/level3/pad.opencl"
+ #include "../../kernels/level3/transpose.opencl"
+ #include "../../kernels/level3/padtranspose.opencl"
+ #include "../../kernels/level3/xgemm.opencl"
;
}
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index 5174e9ab..6a915c0b 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -27,14 +27,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor
template <typename T, typename U>
-Xherk<T,U>::Xherk(Queue &queue, Event &event):
- Routine<T>(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+Xherk<T,U>::Xherk(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
- #include "../../kernels/copy.opencl"
- #include "../../kernels/pad.opencl"
- #include "../../kernels/transpose.opencl"
- #include "../../kernels/padtranspose.opencl"
- #include "../../kernels/xgemm.opencl"
+ #include "../../kernels/level3/copy.opencl"
+ #include "../../kernels/level3/pad.opencl"
+ #include "../../kernels/level3/transpose.opencl"
+ #include "../../kernels/level3/padtranspose.opencl"
+ #include "../../kernels/level3/xgemm.opencl"
;
}
diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc
index 37c08d3b..583d5c7d 100644
--- a/src/routines/level3/xsymm.cc
+++ b/src/routines/level3/xsymm.cc
@@ -21,8 +21,8 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
-Xsymm<T>::Xsymm(Queue &queue, Event &event):
- Xgemm<T>(queue, event) {
+Xsymm<T>::Xsymm(Queue &queue, Event &event, const std::string &name):
+ Xgemm<T>(queue, event, name) {
}
// =================================================================================================
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index b36e7c5e..de5f1afc 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -29,14 +29,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor
template <typename T>
-Xsyr2k<T>::Xsyr2k(Queue &queue, Event &event):
- Routine<T>(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+Xsyr2k<T>::Xsyr2k(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
- #include "../../kernels/copy.opencl"
- #include "../../kernels/pad.opencl"
- #include "../../kernels/transpose.opencl"
- #include "../../kernels/padtranspose.opencl"
- #include "../../kernels/xgemm.opencl"
+ #include "../../kernels/level3/copy.opencl"
+ #include "../../kernels/level3/pad.opencl"
+ #include "../../kernels/level3/transpose.opencl"
+ #include "../../kernels/level3/padtranspose.opencl"
+ #include "../../kernels/level3/xgemm.opencl"
;
}
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index e4668216..d8fc6357 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -29,14 +29,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
-Xsyrk<T>::Xsyrk(Queue &queue, Event &event):
- Routine<T>(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+Xsyrk<T>::Xsyrk(Queue &queue, Event &event, const std::string &name):
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
- #include "../../kernels/copy.opencl"
- #include "../../kernels/pad.opencl"
- #include "../../kernels/transpose.opencl"
- #include "../../kernels/padtranspose.opencl"
- #include "../../kernels/xgemm.opencl"
+ #include "../../kernels/level3/copy.opencl"
+ #include "../../kernels/level3/pad.opencl"
+ #include "../../kernels/level3/transpose.opencl"
+ #include "../../kernels/level3/padtranspose.opencl"
+ #include "../../kernels/level3/xgemm.opencl"
;
}
diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc
index 8be7d950..1180c026 100644
--- a/src/routines/level3/xtrmm.cc
+++ b/src/routines/level3/xtrmm.cc
@@ -21,8 +21,8 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
-Xtrmm<T>::Xtrmm(Queue &queue, Event &event):
- Xgemm<T>(queue, event) {
+Xtrmm<T>::Xtrmm(Queue &queue, Event &event, const std::string &name):
+ Xgemm<T>(queue, event, name) {
}
// =================================================================================================
diff --git a/src/tuning/copy.cc b/src/tuning/copy.cc
index 23828b25..e2837e60 100644
--- a/src/tuning/copy.cc
+++ b/src/tuning/copy.cc
@@ -31,7 +31,7 @@ class TuneCopy {
static std::string GetSources() {
return
#include "../src/kernels/common.opencl"
- #include "../src/kernels/copy.opencl"
+ #include "../src/kernels/level3/copy.opencl"
;
}
diff --git a/src/tuning/pad.cc b/src/tuning/pad.cc
index 6a826b6b..72729422 100644
--- a/src/tuning/pad.cc
+++ b/src/tuning/pad.cc
@@ -31,7 +31,7 @@ class TunePad {
static std::string GetSources() {
return
#include "../src/kernels/common.opencl"
- #include "../src/kernels/pad.opencl"
+ #include "../src/kernels/level3/pad.opencl"
;
}
diff --git a/src/tuning/padtranspose.cc b/src/tuning/padtranspose.cc
index 3f233809..5edd89e0 100644
--- a/src/tuning/padtranspose.cc
+++ b/src/tuning/padtranspose.cc
@@ -31,7 +31,7 @@ class TunePadTranspose {
static std::string GetSources() {
return
#include "../src/kernels/common.opencl"
- #include "../src/kernels/padtranspose.opencl"
+ #include "../src/kernels/level3/padtranspose.opencl"
;
}
diff --git a/src/tuning/transpose.cc b/src/tuning/transpose.cc
index 3998ba66..113e0a81 100644
--- a/src/tuning/transpose.cc
+++ b/src/tuning/transpose.cc
@@ -31,7 +31,7 @@ class TuneTranspose {
static std::string GetSources() {
return
#include "../src/kernels/common.opencl"
- #include "../src/kernels/transpose.opencl"
+ #include "../src/kernels/level3/transpose.opencl"
;
}
diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc
index e820cfb0..c06e3e72 100644
--- a/src/tuning/xgemm.cc
+++ b/src/tuning/xgemm.cc
@@ -31,7 +31,7 @@ class TuneXgemm {
static std::string GetSources() {
return
#include "../src/kernels/common.opencl"
- #include "../src/kernels/xgemm.opencl"
+ #include "../src/kernels/level3/xgemm.opencl"
;
}