From 13eb772343c46109da0181db3bdc6fa436a9dcce Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 30 Nov 2017 21:32:47 +0100 Subject: Integrated pre-processor in compilation flow, default is still disabled --- src/routine.cpp | 2 +- src/tuning/tuning.hpp | 4 ++-- src/utilities/compile.cpp | 13 +++++++++++-- src/utilities/compile.hpp | 4 +++- 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/routine.cpp b/src/routine.cpp index 93882fbf..8c9317d1 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -148,7 +148,7 @@ void Routine::InitProgram(std::initializer_list source) { // Completes the source and compiles the kernel program_ = CompileFromSource(source_string, precision_, routine_name_, - device_, context_, options); + device_, context_, options, false); // Store the compiled binary and program in the cache diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp index 12826d0c..329314e5 100644 --- a/src/tuning/tuning.hpp +++ b/src/tuning/tuning.hpp @@ -227,7 +227,7 @@ void Tuner(int argc, char* argv[]) { // Compiles the kernel auto compiler_options = std::vector(); const auto program = CompileFromSource(settings.sources, args.precision, settings.kernel_name, - device, context, compiler_options); + device, context, compiler_options, false); auto kernel = Kernel(program, settings.kernel_name); C::SetArguments(kernel, args, device_buffers); printf(" %sOK%s |", kPrintSuccess.c_str(), kPrintEnd.c_str()); @@ -286,7 +286,7 @@ void Tuner(int argc, char* argv[]) { const auto start_time = std::chrono::steady_clock::now(); auto compiler_options = std::vector(); const auto program = CompileFromSource(kernel_source, args.precision, settings.kernel_name, - device, context, compiler_options, true); + device, context, compiler_options, false, true); auto kernel = Kernel(program, settings.kernel_name); const auto elapsed_time = std::chrono::steady_clock::now() - start_time; const auto timing = std::chrono::duration(elapsed_time).count(); diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index 2a55506e..4d1e8929 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -15,6 +15,7 @@ #include #include "routines/common.hpp" +#include "kernel_preprocessor.hpp" namespace clblast { // ================================================================================================= @@ -23,7 +24,8 @@ namespace clblast { Program CompileFromSource(const std::string &source_string, const Precision precision, const std::string &routine_name, const Device& device, const Context& context, - std::vector& options, const bool silent) { + std::vector& options, + const bool run_preprocessor, const bool silent) { auto header_string = std::string{""}; header_string += "#define PRECISION " + ToString(static_cast(precision)) + "\n"; @@ -73,8 +75,15 @@ Program CompileFromSource(const std::string &source_string, const Precision prec const auto start_time = std::chrono::steady_clock::now(); #endif + // Runs a pre-processor to unroll loops and perform array-to-register promotion + auto kernel_string = header_string + source_string; + if (run_preprocessor) { + log_debug("Running built-in pre-processor"); + kernel_string = PreprocessKernelSource(kernel_string); + } + // Compiles the kernel - auto program = Program(context, header_string + source_string); + auto program = Program(context, kernel_string); try { program.Build(device, options); } catch (const CLCudaAPIBuildError &e) { diff --git a/src/utilities/compile.hpp b/src/utilities/compile.hpp index 0315d70c..0df2ded5 100644 --- a/src/utilities/compile.hpp +++ b/src/utilities/compile.hpp @@ -27,7 +27,9 @@ namespace clblast { Program CompileFromSource(const std::string &source_string, const Precision precision, const std::string &routine_name, const Device& device, const Context& context, - std::vector& options, const bool silent = false); + std::vector& options, + const bool run_preprocessor, + const bool silent = false); // ================================================================================================= } // namespace clblast -- cgit v1.2.3