summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/routine.cpp2
-rw-r--r--src/tuning/tuning.hpp4
-rw-r--r--src/utilities/compile.cpp13
-rw-r--r--src/utilities/compile.hpp4
-rw-r--r--test/correctness/misc/preprocessor.cpp22
5 files changed, 28 insertions, 17 deletions
diff --git a/src/routine.cpp b/src/routine.cpp
index 93882fbf..8c9317d1 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -148,7 +148,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
// Completes the source and compiles the kernel
program_ = CompileFromSource(source_string, precision_, routine_name_,
- device_, context_, options);
+ device_, context_, options, false);
// Store the compiled binary and program in the cache
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 12826d0c..329314e5 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -227,7 +227,7 @@ void Tuner(int argc, char* argv[]) {
// Compiles the kernel
auto compiler_options = std::vector<std::string>();
const auto program = CompileFromSource(settings.sources, args.precision, settings.kernel_name,
- device, context, compiler_options);
+ device, context, compiler_options, false);
auto kernel = Kernel(program, settings.kernel_name);
C::SetArguments(kernel, args, device_buffers);
printf(" %sOK%s |", kPrintSuccess.c_str(), kPrintEnd.c_str());
@@ -286,7 +286,7 @@ void Tuner(int argc, char* argv[]) {
const auto start_time = std::chrono::steady_clock::now();
auto compiler_options = std::vector<std::string>();
const auto program = CompileFromSource(kernel_source, args.precision, settings.kernel_name,
- device, context, compiler_options, true);
+ device, context, compiler_options, false, true);
auto kernel = Kernel(program, settings.kernel_name);
const auto elapsed_time = std::chrono::steady_clock::now() - start_time;
const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index 2a55506e..4d1e8929 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -15,6 +15,7 @@
#include <chrono>
#include "routines/common.hpp"
+#include "kernel_preprocessor.hpp"
namespace clblast {
// =================================================================================================
@@ -23,7 +24,8 @@ namespace clblast {
Program CompileFromSource(const std::string &source_string, const Precision precision,
const std::string &routine_name,
const Device& device, const Context& context,
- std::vector<std::string>& options, const bool silent) {
+ std::vector<std::string>& options,
+ const bool run_preprocessor, const bool silent) {
auto header_string = std::string{""};
header_string += "#define PRECISION " + ToString(static_cast<int>(precision)) + "\n";
@@ -73,8 +75,15 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
const auto start_time = std::chrono::steady_clock::now();
#endif
+ // Runs a pre-processor to unroll loops and perform array-to-register promotion
+ auto kernel_string = header_string + source_string;
+ if (run_preprocessor) {
+ log_debug("Running built-in pre-processor");
+ kernel_string = PreprocessKernelSource(kernel_string);
+ }
+
// Compiles the kernel
- auto program = Program(context, header_string + source_string);
+ auto program = Program(context, kernel_string);
try {
program.Build(device, options);
} catch (const CLCudaAPIBuildError &e) {
diff --git a/src/utilities/compile.hpp b/src/utilities/compile.hpp
index 0315d70c..0df2ded5 100644
--- a/src/utilities/compile.hpp
+++ b/src/utilities/compile.hpp
@@ -27,7 +27,9 @@ namespace clblast {
Program CompileFromSource(const std::string &source_string, const Precision precision,
const std::string &routine_name,
const Device& device, const Context& context,
- std::vector<std::string>& options, const bool silent = false);
+ std::vector<std::string>& options,
+ const bool run_preprocessor,
+ const bool silent = false);
// =================================================================================================
} // namespace clblast
diff --git a/test/correctness/misc/preprocessor.cpp b/test/correctness/misc/preprocessor.cpp
index bcc65700..3f7a2e13 100644
--- a/test/correctness/misc/preprocessor.cpp
+++ b/test/correctness/misc/preprocessor.cpp
@@ -33,27 +33,27 @@ bool TestKernel(const Device& device, const Context& context,
// Verifies that the current kernel compiles properly (assumes so, otherwise throws an error)
auto compiler_options_ref = std::vector<std::string>();
const auto program_ref = CompileFromSource(kernel_source, precision, kernel_name,
- device, context, compiler_options_ref);
+ device, context, compiler_options_ref, false);
- // Runs the pre-processor
- const auto processed_source = PreprocessKernelSource(kernel_source);
-
- // Verifies that the new kernel compiles properly
+ // Compiles the same kernel, but now with the pre-processor enabled
try {
auto compiler_options = std::vector<std::string>();
- const auto program = CompileFromSource(processed_source, precision, kernel_name,
- device, context, compiler_options);
+ const auto program = CompileFromSource(kernel_source, precision, kernel_name,
+ device, context, compiler_options, true);
return true;
- } catch (...) {
- fprintf(stdout, "* ERROR: Compilation warnings/errors with pre-processed kernel\n");
+ } catch (const CLCudaAPIBuildError &e) {
+ fprintf(stdout, "* ERROR: Compilation warnings/errors with pre-processed kernel, status %zu\n",
+ static_cast<size_t>(e.status()));
+ return false;
+ } catch (const Error<std::runtime_error> &e) {
+ fprintf(stdout, "* ERROR: Pre-processor error, message:\n%s\n", e.what());
return false;
}
}
// =================================================================================================
-size_t RunPreprocessor(int argc, char *argv[], const bool silent,
- const Precision precision) {
+size_t RunPreprocessor(int argc, char *argv[], const bool silent, const Precision precision) {
auto errors = size_t{0};
auto passed = size_t{0};