summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-11-17 20:57:46 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-11-17 20:57:46 +0100
commitf94d498a3773f838bcffb90fd56993a1583ad8ae (patch)
treec902842196fea011f817e0e408e5d70f7bfeaaa2 /src/routines
parentd9cf206979bf2938b6790300756bab5c9d7987b6 (diff)
Moved compilation function to separate file; removed dependency of tuners of the CLBlast library
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/common.cpp78
-rw-r--r--src/routines/common.hpp7
2 files changed, 1 insertions, 84 deletions
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index c415d9fd..5b178e53 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -19,84 +19,6 @@
namespace clblast {
// =================================================================================================
-// Compiles a program from source code
-Program CompileFromSource(const std::string &source_string, const Precision precision,
- const std::string &routine_name,
- const Device& device, const Context& context,
- std::vector<std::string>& options) {
- auto header_string = std::string{""};
-
- header_string += "#define PRECISION " + ToString(static_cast<int>(precision)) + "\n";
-
- // Adds the name of the routine as a define
- header_string += "#define ROUTINE_" + routine_name + "\n";
-
- // Not all OpenCL compilers support the 'inline' keyword. The keyword is only used for devices on
- // which it is known to work with all OpenCL platforms.
- if (device.IsNVIDIA() || device.IsARM()) {
- header_string += "#define USE_INLINE_KEYWORD 1\n";
- }
-
- // For specific devices, use the non-IEE754 compliant OpenCL mad() instruction. This can improve
- // performance, but might result in a reduced accuracy.
- if (device.IsAMD() && device.IsGPU()) {
- header_string += "#define USE_CL_MAD 1\n";
- }
-
- // For specific devices, use staggered/shuffled workgroup indices.
- if (device.IsAMD() && device.IsGPU()) {
- header_string += "#define USE_STAGGERED_INDICES 1\n";
- }
-
- // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize
- // performance through better cache behaviour
- if (device.IsARM() && device.IsGPU()) {
- header_string += "#define GLOBAL_MEM_FENCE 1\n";
- }
-
- // Optionally adds a translation header from OpenCL kernels to CUDA kernels
- #ifdef CUDA_API
- source_string +=
- #include "kernels/opencl_to_cuda.h"
- ;
- #endif
-
- // Loads the common header (typedefs and defines and such)
- header_string +=
- #include "kernels/common.opencl"
- ;
-
- // Prints details of the routine to compile in case of debugging in verbose mode
- #ifdef VERBOSE
- printf("[DEBUG] Compiling routine '%s-%s'\n",
- routine_name.c_str(), ToString(precision).c_str());
- const auto start_time = std::chrono::steady_clock::now();
- #endif
-
- // Compiles the kernel
- auto program = Program(context, header_string + source_string);
- try {
- program.Build(device, options);
- } catch (const CLCudaAPIBuildError &e) {
- if (program.StatusIsCompilationWarningOrError(e.status())) {
- fprintf(stdout, "OpenCL compiler error/warning: %s\n",
- program.GetBuildInfo(device).c_str());
- }
- throw;
- }
-
- // Prints the elapsed compilation time in case of debugging in verbose mode
- #ifdef VERBOSE
- const auto elapsed_time = std::chrono::steady_clock::now() - start_time;
- const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
- printf("[DEBUG] Completed compilation in %.2lf ms\n", timing);
- #endif
-
- return program;
-}
-
-// =================================================================================================
-
// Enqueues a kernel, waits for completion, and checks for errors
void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index 8a93d74a..06d001d9 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -20,17 +20,12 @@
#include <vector>
#include "utilities/utilities.hpp"
+#include "utilities/compile.hpp"
#include "database/database.hpp"
namespace clblast {
// =================================================================================================
-// Compiles a program from source code
-Program CompileFromSource(const std::string &source_string, const Precision precision,
- const std::string &routine_name,
- const Device& device, const Context& context,
- std::vector<std::string>& options);
-
// Enqueues a kernel, waits for completion, and checks for errors
void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local,