From f94d498a3773f838bcffb90fd56993a1583ad8ae Mon Sep 17 00:00:00 2001
From: Cedric Nugteren <web@cedricnugteren.nl>
Date: Fri, 17 Nov 2017 20:57:46 +0100
Subject: Moved compilation function to separate file; removed dependency of
 tuners of the CLBlast library

---
 src/routines/common.cpp | 78 -------------------------------------------------
 src/routines/common.hpp |  7 +----
 2 files changed, 1 insertion(+), 84 deletions(-)

(limited to 'src/routines')
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index c415d9fd..5b178e53 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -19,84 +19,6 @@
 namespace clblast {
 // =================================================================================================
 
-// Compiles a program from source code
-Program CompileFromSource(const std::string &source_string, const Precision precision,
-                          const std::string &routine_name,
-                          const Device& device, const Context& context,
-                          std::vector<std::string>& options) {
-  auto header_string = std::string{""};
-
-  header_string += "#define PRECISION " + ToString(static_cast<int>(precision)) + "\n";
-
-  // Adds the name of the routine as a define
-  header_string += "#define ROUTINE_" + routine_name + "\n";
-
-  // Not all OpenCL compilers support the 'inline' keyword. The keyword is only used for devices on
-  // which it is known to work with all OpenCL platforms.
-  if (device.IsNVIDIA() || device.IsARM()) {
-    header_string += "#define USE_INLINE_KEYWORD 1\n";
-  }
-
-  // For specific devices, use the non-IEE754 compliant OpenCL mad() instruction. This can improve
-  // performance, but might result in a reduced accuracy.
-  if (device.IsAMD() && device.IsGPU()) {
-    header_string += "#define USE_CL_MAD 1\n";
-  }
-
-  // For specific devices, use staggered/shuffled workgroup indices.
-  if (device.IsAMD() && device.IsGPU()) {
-    header_string += "#define USE_STAGGERED_INDICES 1\n";
-  }
-
-  // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize
-  // performance through better cache behaviour
-  if (device.IsARM() && device.IsGPU()) {
-    header_string += "#define GLOBAL_MEM_FENCE 1\n";
-  }
-
-  // Optionally adds a translation header from OpenCL kernels to CUDA kernels
-  #ifdef CUDA_API
-    source_string +=
-      #include "kernels/opencl_to_cuda.h"
-    ;
-  #endif
-
-  // Loads the common header (typedefs and defines and such)
-  header_string +=
-    #include "kernels/common.opencl"
-  ;
-
-  // Prints details of the routine to compile in case of debugging in verbose mode
-  #ifdef VERBOSE
-    printf("[DEBUG] Compiling routine '%s-%s'\n",
-           routine_name.c_str(), ToString(precision).c_str());
-    const auto start_time = std::chrono::steady_clock::now();
-  #endif
-
-  // Compiles the kernel
-  auto program = Program(context, header_string + source_string);
-  try {
-    program.Build(device, options);
-  } catch (const CLCudaAPIBuildError &e) {
-    if (program.StatusIsCompilationWarningOrError(e.status())) {
-      fprintf(stdout, "OpenCL compiler error/warning: %s\n",
-              program.GetBuildInfo(device).c_str());
-    }
-    throw;
-  }
-
-  // Prints the elapsed compilation time in case of debugging in verbose mode
-  #ifdef VERBOSE
-    const auto elapsed_time = std::chrono::steady_clock::now() - start_time;
-    const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
-    printf("[DEBUG] Completed compilation in %.2lf ms\n", timing);
-  #endif
-
-  return program;
-}
-
-// =================================================================================================
-
 // Enqueues a kernel, waits for completion, and checks for errors
 void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
                std::vector<size_t> global, const std::vector<size_t> &local,
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index 8a93d74a..06d001d9 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -20,17 +20,12 @@
 #include <vector>
 
 #include "utilities/utilities.hpp"
+#include "utilities/compile.hpp"
 #include "database/database.hpp"
 
 namespace clblast {
 // =================================================================================================
 
-// Compiles a program from source code
-Program CompileFromSource(const std::string &source_string, const Precision precision,
-                          const std::string &routine_name,
-                          const Device& device, const Context& context,
-                          std::vector<std::string>& options);
-
 // Enqueues a kernel, waits for completion, and checks for errors
 void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
                std::vector<size_t> global, const std::vector<size_t> &local,
-- 
cgit v1.2.3