summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/internal/routine.h8
-rw-r--r--src/clblast.cc147
-rw-r--r--src/kernels/common.opencl2
-rw-r--r--src/kernels/copy.opencl2
-rw-r--r--src/kernels/pad.opencl2
-rw-r--r--src/kernels/padtranspose.opencl2
-rw-r--r--src/kernels/transpose.opencl2
-rw-r--r--src/kernels/xaxpy.opencl2
-rw-r--r--src/kernels/xgemm.opencl2
-rw-r--r--src/kernels/xgemv.opencl2
-rw-r--r--src/routine.cc10
-rw-r--r--src/routines/level1/xaxpy.cc5
-rw-r--r--src/routines/level2/xgemv.cc5
-rw-r--r--src/routines/level3/xgemm.cc9
-rw-r--r--src/routines/level3/xher2k.cc9
-rw-r--r--src/routines/level3/xherk.cc9
-rw-r--r--src/routines/level3/xsyr2k.cc9
-rw-r--r--src/routines/level3/xsyrk.cc9
-rw-r--r--src/tuning/copy.cc9
-rw-r--r--src/tuning/pad.cc9
-rw-r--r--src/tuning/padtranspose.cc9
-rw-r--r--src/tuning/transpose.cc9
-rw-r--r--src/tuning/xaxpy.cc9
-rw-r--r--src/tuning/xgemm.cc9
-rw-r--r--src/tuning/xgemv.cc9
25 files changed, 118 insertions, 181 deletions
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 49a36c10..7b605d48 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -58,11 +58,11 @@ class Routine {
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
// Base class constructor
- explicit Routine(CommandQueue &queue, Event &event,
+ explicit Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision);
// Set-up phase of the kernel
- StatusCode SetUp(const std::string &routine_source);
+ StatusCode SetUp();
protected:
@@ -107,6 +107,10 @@ class Routine {
// a derived class.
const Precision precision_;
+ // The routine's name and its kernel-source in string form
+ const std::string routine_name_;
+ std::string source_string_;
+
// The OpenCL objects, accessible only from derived classes
CommandQueue queue_;
Event event_;
diff --git a/src/clblast.cc b/src/clblast.cc
index b5d53ee6..6cb4086e 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -47,10 +47,8 @@ StatusCode Axpy(const size_t n, const T alpha,
auto event_cpp = Event(*event);
auto routine = Xaxpy<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string kernel_source =
- #include "kernels/xaxpy.opencl"
- auto status = routine.SetUp(kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -91,10 +89,8 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
auto event_cpp = Event(*event);
auto routine = Xgemv<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string kernel_source =
- #include "kernels/xgemv.opencl"
- auto status = routine.SetUp(kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -143,19 +139,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
auto event_cpp = Event(*event);
auto routine = Xgemm<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -203,19 +188,8 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xsymm<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -263,19 +237,8 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xhemm<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -310,19 +273,8 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event);
auto routine = Xsyrk<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -364,19 +316,8 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event);
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -409,19 +350,8 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event);
auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -469,19 +399,8 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event);
auto routine = Xher2k<T,U>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -518,19 +437,8 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xtrmm<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@@ -579,19 +487,8 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xtrsm<T>(queue_cpp, event_cpp);
- // Loads the kernel source-code as an include (C++11 raw string literal)
- std::string common_source1 =
- #include "kernels/copy.opencl"
- std::string common_source2 =
- #include "kernels/pad.opencl"
- std::string common_source3 =
- #include "kernels/transpose.opencl"
- std::string common_source4 =
- #include "kernels/padtranspose.opencl"
- std::string kernel_source =
- #include "kernels/xgemm.opencl"
- auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
- kernel_source);
+ // Compiles the routine's device kernels
+ auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 2e1d8f90..12d63b99 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -143,6 +143,6 @@ R"(
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/copy.opencl b/src/kernels/copy.opencl
index f95b476b..7dde688b 100644
--- a/src/kernels/copy.opencl
+++ b/src/kernels/copy.opencl
@@ -68,6 +68,6 @@ __kernel void CopyMatrix(const int ld,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl
index 2791db30..69324f20 100644
--- a/src/kernels/pad.opencl
+++ b/src/kernels/pad.opencl
@@ -344,6 +344,6 @@ __kernel void TrmmUpperToSquared(const int src_dim,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl
index b2b96aa0..a6b70f0b 100644
--- a/src/kernels/padtranspose.opencl
+++ b/src/kernels/padtranspose.opencl
@@ -159,6 +159,6 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/transpose.opencl b/src/kernels/transpose.opencl
index 79ab1688..1b369343 100644
--- a/src/kernels/transpose.opencl
+++ b/src/kernels/transpose.opencl
@@ -163,6 +163,6 @@ __kernel void TransposeMatrix(const int ld,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/xaxpy.opencl b/src/kernels/xaxpy.opencl
index 40c6c3bd..b7ffe9ff 100644
--- a/src/kernels/xaxpy.opencl
+++ b/src/kernels/xaxpy.opencl
@@ -123,6 +123,6 @@ __kernel void XaxpyFast(const int n, const real alpha,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/xgemm.opencl b/src/kernels/xgemm.opencl
index b888f3ef..b689fa1e 100644
--- a/src/kernels/xgemm.opencl
+++ b/src/kernels/xgemm.opencl
@@ -675,6 +675,6 @@ __kernel void XgemmLower(const int kSizeN, const int kSizeK,
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/kernels/xgemv.opencl b/src/kernels/xgemv.opencl
index 4bb69090..65061717 100644
--- a/src/kernels/xgemv.opencl
+++ b/src/kernels/xgemv.opencl
@@ -368,6 +368,6 @@ __kernel void XgemvFastRot(const int m, const int n, const real alpha, const rea
// =================================================================================================
// End of the C++11 raw string literal
-)";
+)"
// =================================================================================================
diff --git a/src/routine.cc b/src/routine.cc
index 27bfa8f9..eb5c5034 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -22,9 +22,10 @@ namespace clblast {
std::vector<Routine::ProgramCache> Routine::program_cache_;
// Constructor: not much here, because no status codes can be returned
-Routine::Routine(CommandQueue &queue, Event &event,
+Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision):
precision_(precision),
+ routine_name_(name),
queue_(queue),
event_(event),
context_(queue_.GetContext()),
@@ -40,7 +41,7 @@ Routine::Routine(CommandQueue &queue, Event &event,
// =================================================================================================
// Separate set-up function to allow for status codes to be returned
-StatusCode Routine::SetUp(const std::string &routine_source) {
+StatusCode Routine::SetUp() {
// Queries the cache to see whether or not the compiled kernel is already there. If not, it will
// be built and added to the cache.
@@ -63,7 +64,8 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
// Loads the common header (typedefs and defines and such)
std::string common_header =
- #include "kernels/common.opencl"
+ #include "kernels/common.opencl"
+ ;
// Collects the parameters for this device in the form of defines, and adds the precision
auto defines = db_.GetDefines();
@@ -76,7 +78,7 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
}
// Combines everything together into a single source string
- auto source_string = defines + common_header + routine_source;
+ auto source_string = defines + common_header + source_string_;
// Compiles the kernel
try {
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index fba36851..ed680856 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -30,7 +30,10 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Xaxpy"}, precision_) {
+ Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
+ source_string_ =
+ #include "../../kernels/xaxpy.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index 181337b6..22bbb7ea 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -30,7 +30,10 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Xgemv"}, precision_) {
+ Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
+ source_string_ =
+ #include "../../kernels/xgemv.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 950a8550..13ffafbb 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -30,7 +30,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
+ Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ source_string_ =
+ #include "../../kernels/copy.opencl"
+ #include "../../kernels/pad.opencl"
+ #include "../../kernels/transpose.opencl"
+ #include "../../kernels/padtranspose.opencl"
+ #include "../../kernels/xgemm.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index 45793ca7..b4291c1e 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -28,7 +28,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
+ Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ source_string_ =
+ #include "../../kernels/copy.opencl"
+ #include "../../kernels/pad.opencl"
+ #include "../../kernels/transpose.opencl"
+ #include "../../kernels/padtranspose.opencl"
+ #include "../../kernels/xgemm.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index eaa8861b..4b16d8f7 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -28,7 +28,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
+ Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ source_string_ =
+ #include "../../kernels/copy.opencl"
+ #include "../../kernels/pad.opencl"
+ #include "../../kernels/transpose.opencl"
+ #include "../../kernels/padtranspose.opencl"
+ #include "../../kernels/xgemm.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 66370827..6483629c 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -30,7 +30,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
+ Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ source_string_ =
+ #include "../../kernels/copy.opencl"
+ #include "../../kernels/pad.opencl"
+ #include "../../kernels/transpose.opencl"
+ #include "../../kernels/padtranspose.opencl"
+ #include "../../kernels/xgemm.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 0bafe703..5cc1cbec 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -30,7 +30,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
- Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
+ Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ source_string_ =
+ #include "../../kernels/copy.opencl"
+ #include "../../kernels/pad.opencl"
+ #include "../../kernels/transpose.opencl"
+ #include "../../kernels/padtranspose.opencl"
+ #include "../../kernels/xgemm.opencl"
+ ;
}
// =================================================================================================
diff --git a/src/tuning/copy.cc b/src/tuning/copy.cc
index da223bf0..125b076e 100644
--- a/src/tuning/copy.cc
+++ b/src/tuning/copy.cc
@@ -30,11 +30,10 @@ void CopyTune(const Arguments<T> &args,
// This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example
// of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be
// chosen as a representative.
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/copy.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/copy.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8});
diff --git a/src/tuning/pad.cc b/src/tuning/pad.cc
index b6254cd5..584415c7 100644
--- a/src/tuning/pad.cc
+++ b/src/tuning/pad.cc
@@ -30,11 +30,10 @@ void PadTune(const Arguments<T> &args,
// This points to the PadMatrix kernel as found in the CLBlast library. This is just one
// example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has
// to be chosen as a representative.
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/pad.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/pad.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8});
diff --git a/src/tuning/padtranspose.cc b/src/tuning/padtranspose.cc
index c84e5950..25044556 100644
--- a/src/tuning/padtranspose.cc
+++ b/src/tuning/padtranspose.cc
@@ -30,11 +30,10 @@ void PadTransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative.
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/padtranspose.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/padtranspose.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8});
diff --git a/src/tuning/transpose.cc b/src/tuning/transpose.cc
index 90392866..46756599 100644
--- a/src/tuning/transpose.cc
+++ b/src/tuning/transpose.cc
@@ -30,11 +30,10 @@ void TransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative.
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/transpose.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/transpose.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8});
diff --git a/src/tuning/xaxpy.cc b/src/tuning/xaxpy.cc
index 0439ed05..20b5978e 100644
--- a/src/tuning/xaxpy.cc
+++ b/src/tuning/xaxpy.cc
@@ -34,11 +34,10 @@ void XaxpyTune(const Arguments<T> &args,
}
// This points to the XaxpyFast kernel as found in the CLBlast library
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/xaxpy.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/xaxpy.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1});
tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64});
diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc
index aba56810..3fe58ed5 100644
--- a/src/tuning/xgemm.cc
+++ b/src/tuning/xgemm.cc
@@ -30,11 +30,10 @@ void XgemmTune(const Arguments<T> &args,
cltune::Tuner &tuner) {
// This points to the Xgemm kernel as found in the CLBlast library and its golden reference
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/xgemm.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/xgemm.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8});
diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc
index 48df6f25..a9d88e4b 100644
--- a/src/tuning/xgemv.cc
+++ b/src/tuning/xgemv.cc
@@ -36,11 +36,10 @@ void XgemvTune(const Arguments<T> &args, const size_t variation,
auto a_rotated = (variation == 3) ? 1 : 0;
// This points to the Xgemv kernel as found in the CLBlast library
- std::string common_source =
- #include "../src/kernels/common.opencl"
- std::string kernel_source =
- #include "../src/kernels/xgemv.opencl"
- auto sources = common_source + kernel_source;
+ std::string sources =
+ #include "../src/kernels/common.opencl"
+ #include "../src/kernels/xgemv.opencl"
+ ;
auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1});
tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64});