diff options
-rw-r--r-- | include/internal/routine.h | 8 | ||||
-rw-r--r-- | src/clblast.cc | 147 | ||||
-rw-r--r-- | src/kernels/common.opencl | 2 | ||||
-rw-r--r-- | src/kernels/copy.opencl | 2 | ||||
-rw-r--r-- | src/kernels/pad.opencl | 2 | ||||
-rw-r--r-- | src/kernels/padtranspose.opencl | 2 | ||||
-rw-r--r-- | src/kernels/transpose.opencl | 2 | ||||
-rw-r--r-- | src/kernels/xaxpy.opencl | 2 | ||||
-rw-r--r-- | src/kernels/xgemm.opencl | 2 | ||||
-rw-r--r-- | src/kernels/xgemv.opencl | 2 | ||||
-rw-r--r-- | src/routine.cc | 10 | ||||
-rw-r--r-- | src/routines/level1/xaxpy.cc | 5 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cc | 5 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cc | 9 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 9 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 9 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 9 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 9 | ||||
-rw-r--r-- | src/tuning/copy.cc | 9 | ||||
-rw-r--r-- | src/tuning/pad.cc | 9 | ||||
-rw-r--r-- | src/tuning/padtranspose.cc | 9 | ||||
-rw-r--r-- | src/tuning/transpose.cc | 9 | ||||
-rw-r--r-- | src/tuning/xaxpy.cc | 9 | ||||
-rw-r--r-- | src/tuning/xgemm.cc | 9 | ||||
-rw-r--r-- | src/tuning/xgemv.cc | 9 |
25 files changed, 118 insertions, 181 deletions
diff --git a/include/internal/routine.h b/include/internal/routine.h index 49a36c10..7b605d48 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -58,11 +58,11 @@ class Routine { static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); } // Base class constructor - explicit Routine(CommandQueue &queue, Event &event, + explicit Routine(CommandQueue &queue, Event &event, const std::string &name, const std::vector<std::string> &routines, const Precision precision); // Set-up phase of the kernel - StatusCode SetUp(const std::string &routine_source); + StatusCode SetUp(); protected: @@ -107,6 +107,10 @@ class Routine { // a derived class. const Precision precision_; + // The routine's name and its kernel-source in string form + const std::string routine_name_; + std::string source_string_; + // The OpenCL objects, accessible only from derived classes CommandQueue queue_; Event event_; diff --git a/src/clblast.cc b/src/clblast.cc index b5d53ee6..6cb4086e 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -47,10 +47,8 @@ StatusCode Axpy(const size_t n, const T alpha, auto event_cpp = Event(*event); auto routine = Xaxpy<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string kernel_source = - #include "kernels/xaxpy.opencl" - auto status = routine.SetUp(kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -91,10 +89,8 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, auto event_cpp = Event(*event); auto routine = Xgemv<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string kernel_source = - #include "kernels/xgemv.opencl" - auto status = routine.SetUp(kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -143,19 +139,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos auto event_cpp = Event(*event); auto routine = Xgemm<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -203,19 +188,8 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, auto event_cpp = Event(*event); auto routine = Xsymm<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -263,19 +237,8 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, auto event_cpp = Event(*event); auto routine = Xhemm<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -310,19 +273,8 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_ auto event_cpp = Event(*event); auto routine = Xsyrk<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -364,19 +316,8 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_ auto event_cpp = Event(*event); auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -409,19 +350,8 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a auto event_cpp = Event(*event); auto routine = Xsyr2k<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -469,19 +399,8 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a auto event_cpp = Event(*event); auto routine = Xher2k<T,U>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -518,19 +437,8 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, auto event_cpp = Event(*event); auto routine = Xtrmm<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine @@ -579,19 +487,8 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, auto event_cpp = Event(*event); auto routine = Xtrsm<T>(queue_cpp, event_cpp); - // Loads the kernel source-code as an include (C++11 raw string literal) - std::string common_source1 = - #include "kernels/copy.opencl" - std::string common_source2 = - #include "kernels/pad.opencl" - std::string common_source3 = - #include "kernels/transpose.opencl" - std::string common_source4 = - #include "kernels/padtranspose.opencl" - std::string kernel_source = - #include "kernels/xgemm.opencl" - auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 + - kernel_source); + // Compiles the routine's device kernels + auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } // Runs the routine diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 2e1d8f90..12d63b99 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -143,6 +143,6 @@ R"( // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/copy.opencl b/src/kernels/copy.opencl index f95b476b..7dde688b 100644 --- a/src/kernels/copy.opencl +++ b/src/kernels/copy.opencl @@ -68,6 +68,6 @@ __kernel void CopyMatrix(const int ld, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl index 2791db30..69324f20 100644 --- a/src/kernels/pad.opencl +++ b/src/kernels/pad.opencl @@ -344,6 +344,6 @@ __kernel void TrmmUpperToSquared(const int src_dim, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl index b2b96aa0..a6b70f0b 100644 --- a/src/kernels/padtranspose.opencl +++ b/src/kernels/padtranspose.opencl @@ -159,6 +159,6 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/transpose.opencl b/src/kernels/transpose.opencl index 79ab1688..1b369343 100644 --- a/src/kernels/transpose.opencl +++ b/src/kernels/transpose.opencl @@ -163,6 +163,6 @@ __kernel void TransposeMatrix(const int ld, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/xaxpy.opencl b/src/kernels/xaxpy.opencl index 40c6c3bd..b7ffe9ff 100644 --- a/src/kernels/xaxpy.opencl +++ b/src/kernels/xaxpy.opencl @@ -123,6 +123,6 @@ __kernel void XaxpyFast(const int n, const real alpha, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/xgemm.opencl b/src/kernels/xgemm.opencl index b888f3ef..b689fa1e 100644 --- a/src/kernels/xgemm.opencl +++ b/src/kernels/xgemm.opencl @@ -675,6 +675,6 @@ __kernel void XgemmLower(const int kSizeN, const int kSizeK, // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/kernels/xgemv.opencl b/src/kernels/xgemv.opencl index 4bb69090..65061717 100644 --- a/src/kernels/xgemv.opencl +++ b/src/kernels/xgemv.opencl @@ -368,6 +368,6 @@ __kernel void XgemvFastRot(const int m, const int n, const real alpha, const rea // ================================================================================================= // End of the C++11 raw string literal -)"; +)" // ================================================================================================= diff --git a/src/routine.cc b/src/routine.cc index 27bfa8f9..eb5c5034 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -22,9 +22,10 @@ namespace clblast { std::vector<Routine::ProgramCache> Routine::program_cache_; // Constructor: not much here, because no status codes can be returned -Routine::Routine(CommandQueue &queue, Event &event, +Routine::Routine(CommandQueue &queue, Event &event, const std::string &name, const std::vector<std::string> &routines, const Precision precision): precision_(precision), + routine_name_(name), queue_(queue), event_(event), context_(queue_.GetContext()), @@ -40,7 +41,7 @@ Routine::Routine(CommandQueue &queue, Event &event, // ================================================================================================= // Separate set-up function to allow for status codes to be returned -StatusCode Routine::SetUp(const std::string &routine_source) { +StatusCode Routine::SetUp() { // Queries the cache to see whether or not the compiled kernel is already there. If not, it will // be built and added to the cache. @@ -63,7 +64,8 @@ StatusCode Routine::SetUp(const std::string &routine_source) { // Loads the common header (typedefs and defines and such) std::string common_header = - #include "kernels/common.opencl" + #include "kernels/common.opencl" + ; // Collects the parameters for this device in the form of defines, and adds the precision auto defines = db_.GetDefines(); @@ -76,7 +78,7 @@ StatusCode Routine::SetUp(const std::string &routine_source) { } // Combines everything together into a single source string - auto source_string = defines + common_header + routine_source; + auto source_string = defines + common_header + source_string_; // Compiles the kernel try { diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index fba36851..ed680856 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -30,7 +30,10 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event): - Routine(queue, event, {"Xaxpy"}, precision_) { + Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) { + source_string_ = + #include "../../kernels/xaxpy.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 181337b6..22bbb7ea 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -30,7 +30,10 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(CommandQueue &queue, Event &event): - Routine(queue, event, {"Xgemv"}, precision_) { + Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) { + source_string_ = + #include "../../kernels/xgemv.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 950a8550..13ffafbb 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -30,7 +30,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(CommandQueue &queue, Event &event): - Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { + Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + source_string_ = + #include "../../kernels/copy.opencl" + #include "../../kernels/pad.opencl" + #include "../../kernels/transpose.opencl" + #include "../../kernels/padtranspose.opencl" + #include "../../kernels/xgemm.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 45793ca7..b4291c1e 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -28,7 +28,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event): - Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { + Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + source_string_ = + #include "../../kernels/copy.opencl" + #include "../../kernels/pad.opencl" + #include "../../kernels/transpose.opencl" + #include "../../kernels/padtranspose.opencl" + #include "../../kernels/xgemm.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index eaa8861b..4b16d8f7 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -28,7 +28,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(CommandQueue &queue, Event &event): - Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { + Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + source_string_ = + #include "../../kernels/copy.opencl" + #include "../../kernels/pad.opencl" + #include "../../kernels/transpose.opencl" + #include "../../kernels/padtranspose.opencl" + #include "../../kernels/xgemm.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 66370827..6483629c 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -30,7 +30,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event): - Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { + Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + source_string_ = + #include "../../kernels/copy.opencl" + #include "../../kernels/pad.opencl" + #include "../../kernels/transpose.opencl" + #include "../../kernels/padtranspose.opencl" + #include "../../kernels/xgemm.opencl" + ; } // ================================================================================================= diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 0bafe703..5cc1cbec 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -30,7 +30,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event): - Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { + Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + source_string_ = + #include "../../kernels/copy.opencl" + #include "../../kernels/pad.opencl" + #include "../../kernels/transpose.opencl" + #include "../../kernels/padtranspose.opencl" + #include "../../kernels/xgemm.opencl" + ; } // ================================================================================================= diff --git a/src/tuning/copy.cc b/src/tuning/copy.cc index da223bf0..125b076e 100644 --- a/src/tuning/copy.cc +++ b/src/tuning/copy.cc @@ -30,11 +30,10 @@ void CopyTune(const Arguments<T> &args, // This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example // of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be // chosen as a representative. - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/copy.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/copy.opencl" + ; auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1}); tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8}); diff --git a/src/tuning/pad.cc b/src/tuning/pad.cc index b6254cd5..584415c7 100644 --- a/src/tuning/pad.cc +++ b/src/tuning/pad.cc @@ -30,11 +30,10 @@ void PadTune(const Arguments<T> &args, // This points to the PadMatrix kernel as found in the CLBlast library. This is just one // example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has // to be chosen as a representative. - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/pad.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/pad.opencl" + ; auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1}); tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8}); diff --git a/src/tuning/padtranspose.cc b/src/tuning/padtranspose.cc index c84e5950..25044556 100644 --- a/src/tuning/padtranspose.cc +++ b/src/tuning/padtranspose.cc @@ -30,11 +30,10 @@ void PadTransposeTune(const Arguments<T> &args, // This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one // example of a transpose kernel. However, all kernels use the same tuning parameters, so one has // to be chosen as a representative. - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/padtranspose.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/padtranspose.opencl" + ; auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1}); tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8}); diff --git a/src/tuning/transpose.cc b/src/tuning/transpose.cc index 90392866..46756599 100644 --- a/src/tuning/transpose.cc +++ b/src/tuning/transpose.cc @@ -30,11 +30,10 @@ void TransposeTune(const Arguments<T> &args, // This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one // example of a transpose kernel. However, all kernels use the same tuning parameters, so one has // to be chosen as a representative. - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/transpose.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/transpose.opencl" + ; auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1}); tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8}); diff --git a/src/tuning/xaxpy.cc b/src/tuning/xaxpy.cc index 0439ed05..20b5978e 100644 --- a/src/tuning/xaxpy.cc +++ b/src/tuning/xaxpy.cc @@ -34,11 +34,10 @@ void XaxpyTune(const Arguments<T> &args, } // This points to the XaxpyFast kernel as found in the CLBlast library - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/xaxpy.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/xaxpy.opencl" + ; auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1}); tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64}); diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc index aba56810..3fe58ed5 100644 --- a/src/tuning/xgemm.cc +++ b/src/tuning/xgemm.cc @@ -30,11 +30,10 @@ void XgemmTune(const Arguments<T> &args, cltune::Tuner &tuner) { // This points to the Xgemm kernel as found in the CLBlast library and its golden reference - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/xgemm.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/xgemm.opencl" + ; auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1}); tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8}); diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index 48df6f25..a9d88e4b 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -36,11 +36,10 @@ void XgemvTune(const Arguments<T> &args, const size_t variation, auto a_rotated = (variation == 3) ? 1 : 0; // This points to the Xgemv kernel as found in the CLBlast library - std::string common_source = - #include "../src/kernels/common.opencl" - std::string kernel_source = - #include "../src/kernels/xgemv.opencl" - auto sources = common_source + kernel_source; + std::string sources = + #include "../src/kernels/common.opencl" + #include "../src/kernels/xgemv.opencl" + ; auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1}); tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64}); |