From f94d498a3773f838bcffb90fd56993a1583ad8ae Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 17 Nov 2017 20:57:46 +0100 Subject: Moved compilation function to separate file; removed dependency of tuners of the CLBlast library --- CMakeLists.txt | 37 ++++++++++++------ src/routines/common.cpp | 78 ------------------------------------- src/routines/common.hpp | 7 +--- src/tuning/tuning.hpp | 1 + src/utilities/compile.cpp | 99 +++++++++++++++++++++++++++++++++++++++++++++++ src/utilities/compile.hpp | 36 +++++++++++++++++ src/utilities/timing.hpp | 1 - 7 files changed, 163 insertions(+), 96 deletions(-) create mode 100644 src/utilities/compile.cpp create mode 100644 src/utilities/compile.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ba512eb..f051e441 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -224,6 +224,7 @@ endif() set(SOURCES src/database/database.cpp src/routines/common.cpp + src/utilities/compile.cpp src/utilities/clblast_exceptions.cpp src/utilities/timing.cpp src/utilities/utilities.cpp @@ -244,6 +245,7 @@ set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual src/routines/common.hpp src/routines/routines.hpp src/utilities/buffer_test.hpp + src/utilities/compile.hpp src/utilities/clblast_exceptions.hpp src/utilities/device_mapping.hpp src/utilities/msvc.hpp @@ -366,29 +368,42 @@ endif() # ================================================================================================== # This section contains all the code related to the tuners -# TODO: Remove dependency on CLBlast if(TUNERS) set(TUNERS_COMMON + src/utilities/compile.cpp + src/utilities/clblast_exceptions.cpp + src/utilities/timing.cpp + src/utilities/utilities.cpp src/tuning/configurations.cpp + src/tuning/tuning.cpp) + set(TUNERS_HEADERS # such that they can be discovered by IDEs such as CLion and Visual Studio + src/utilities/compile.hpp + src/utilities/clblast_exceptions.hpp + src/utilities/timing.hpp + src/utilities/utilities.hpp src/tuning/configurations.hpp - src/tuning/tuning.cpp src/tuning/tuning.hpp) - # Visual Studio requires the sources of non-exported objects/libraries - if(MSVC) - set(TUNERS_COMMON ${TUNERS_COMMON} src/utilities/utilities.cpp) - endif() - # Adds tuning executables foreach(KERNEL ${KERNELS}) - add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} src/tuning/kernels/${KERNEL}.cpp) - target_link_libraries(clblast_tuner_${KERNEL} clblast ${API_LIBRARIES}) + add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} ${TUNERS_HEADERS} + src/tuning/kernels/${KERNEL}.cpp) + target_include_directories(clblast_tuner_${KERNEL} PUBLIC + $ + $ + ${API_INCLUDE_DIRS}) + target_link_libraries(clblast_tuner_${KERNEL} ${API_LIBRARIES}) install(TARGETS clblast_tuner_${KERNEL} DESTINATION bin) endforeach() foreach(ROUTINE_TUNER ${ROUTINE_TUNERS}) - add_executable(clblast_tuner_routine_${ROUTINE_TUNER} ${TUNERS_COMMON} src/tuning/routines/${ROUTINE_TUNER}.cpp) - target_link_libraries(clblast_tuner_routine_${ROUTINE_TUNER} clblast ${API_LIBRARIES}) + add_executable(clblast_tuner_routine_${ROUTINE_TUNER} ${TUNERS_COMMON} ${TUNERS_HEADERS} + src/tuning/routines/${ROUTINE_TUNER}.cpp) + target_include_directories(clblast_tuner_routine_${ROUTINE_TUNER} PUBLIC + $ + $ + ${API_INCLUDE_DIRS}) + target_link_libraries(clblast_tuner_routine_${ROUTINE_TUNER} ${API_LIBRARIES}) install(TARGETS clblast_tuner_routine_${ROUTINE_TUNER} DESTINATION bin) endforeach() diff --git a/src/routines/common.cpp b/src/routines/common.cpp index c415d9fd..5b178e53 100644 --- a/src/routines/common.cpp +++ b/src/routines/common.cpp @@ -19,84 +19,6 @@ namespace clblast { // ================================================================================================= -// Compiles a program from source code -Program CompileFromSource(const std::string &source_string, const Precision precision, - const std::string &routine_name, - const Device& device, const Context& context, - std::vector& options) { - auto header_string = std::string{""}; - - header_string += "#define PRECISION " + ToString(static_cast(precision)) + "\n"; - - // Adds the name of the routine as a define - header_string += "#define ROUTINE_" + routine_name + "\n"; - - // Not all OpenCL compilers support the 'inline' keyword. The keyword is only used for devices on - // which it is known to work with all OpenCL platforms. - if (device.IsNVIDIA() || device.IsARM()) { - header_string += "#define USE_INLINE_KEYWORD 1\n"; - } - - // For specific devices, use the non-IEE754 compliant OpenCL mad() instruction. This can improve - // performance, but might result in a reduced accuracy. - if (device.IsAMD() && device.IsGPU()) { - header_string += "#define USE_CL_MAD 1\n"; - } - - // For specific devices, use staggered/shuffled workgroup indices. - if (device.IsAMD() && device.IsGPU()) { - header_string += "#define USE_STAGGERED_INDICES 1\n"; - } - - // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize - // performance through better cache behaviour - if (device.IsARM() && device.IsGPU()) { - header_string += "#define GLOBAL_MEM_FENCE 1\n"; - } - - // Optionally adds a translation header from OpenCL kernels to CUDA kernels - #ifdef CUDA_API - source_string += - #include "kernels/opencl_to_cuda.h" - ; - #endif - - // Loads the common header (typedefs and defines and such) - header_string += - #include "kernels/common.opencl" - ; - - // Prints details of the routine to compile in case of debugging in verbose mode - #ifdef VERBOSE - printf("[DEBUG] Compiling routine '%s-%s'\n", - routine_name.c_str(), ToString(precision).c_str()); - const auto start_time = std::chrono::steady_clock::now(); - #endif - - // Compiles the kernel - auto program = Program(context, header_string + source_string); - try { - program.Build(device, options); - } catch (const CLCudaAPIBuildError &e) { - if (program.StatusIsCompilationWarningOrError(e.status())) { - fprintf(stdout, "OpenCL compiler error/warning: %s\n", - program.GetBuildInfo(device).c_str()); - } - throw; - } - - // Prints the elapsed compilation time in case of debugging in verbose mode - #ifdef VERBOSE - const auto elapsed_time = std::chrono::steady_clock::now() - start_time; - const auto timing = std::chrono::duration(elapsed_time).count(); - printf("[DEBUG] Completed compilation in %.2lf ms\n", timing); - #endif - - return program; -} - -// ================================================================================================= - // Enqueues a kernel, waits for completion, and checks for errors void RunKernel(Kernel &kernel, Queue &queue, const Device &device, std::vector global, const std::vector &local, diff --git a/src/routines/common.hpp b/src/routines/common.hpp index 8a93d74a..06d001d9 100644 --- a/src/routines/common.hpp +++ b/src/routines/common.hpp @@ -20,17 +20,12 @@ #include #include "utilities/utilities.hpp" +#include "utilities/compile.hpp" #include "database/database.hpp" namespace clblast { // ================================================================================================= -// Compiles a program from source code -Program CompileFromSource(const std::string &source_string, const Precision precision, - const std::string &routine_name, - const Device& device, const Context& context, - std::vector& options); - // Enqueues a kernel, waits for completion, and checks for errors void RunKernel(Kernel &kernel, Queue &queue, const Device &device, std::vector global, const std::vector &local, diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp index 83f08ea9..c8a12b5b 100644 --- a/src/tuning/tuning.hpp +++ b/src/tuning/tuning.hpp @@ -23,6 +23,7 @@ #include #include "utilities/utilities.hpp" +#include "utilities/compile.hpp" #include "utilities/timing.hpp" #include "tuning/configurations.hpp" diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp new file mode 100644 index 00000000..3c02d316 --- /dev/null +++ b/src/utilities/compile.cpp @@ -0,0 +1,99 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the kernel compilation functions (see the header for more information). +// +// ================================================================================================= + +#include +#include + +#include "routines/common.hpp" + +namespace clblast { +// ================================================================================================= + +// Compiles a program from source code +Program CompileFromSource(const std::string &source_string, const Precision precision, + const std::string &routine_name, + const Device& device, const Context& context, + std::vector& options) { + auto header_string = std::string{""}; + + header_string += "#define PRECISION " + ToString(static_cast(precision)) + "\n"; + + // Adds the name of the routine as a define + header_string += "#define ROUTINE_" + routine_name + "\n"; + + // Not all OpenCL compilers support the 'inline' keyword. The keyword is only used for devices on + // which it is known to work with all OpenCL platforms. + if (device.IsNVIDIA() || device.IsARM()) { + header_string += "#define USE_INLINE_KEYWORD 1\n"; + } + + // For specific devices, use the non-IEE754 compliant OpenCL mad() instruction. This can improve + // performance, but might result in a reduced accuracy. + if (device.IsAMD() && device.IsGPU()) { + header_string += "#define USE_CL_MAD 1\n"; + } + + // For specific devices, use staggered/shuffled workgroup indices. + if (device.IsAMD() && device.IsGPU()) { + header_string += "#define USE_STAGGERED_INDICES 1\n"; + } + + // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize + // performance through better cache behaviour + if (device.IsARM() && device.IsGPU()) { + header_string += "#define GLOBAL_MEM_FENCE 1\n"; + } + + // Optionally adds a translation header from OpenCL kernels to CUDA kernels + #ifdef CUDA_API + source_string += + #include "kernels/opencl_to_cuda.h" + ; + #endif + + // Loads the common header (typedefs and defines and such) + header_string += + #include "kernels/common.opencl" + ; + + // Prints details of the routine to compile in case of debugging in verbose mode + #ifdef VERBOSE + printf("[DEBUG] Compiling routine '%s-%s'\n", + routine_name.c_str(), ToString(precision).c_str()); + const auto start_time = std::chrono::steady_clock::now(); + #endif + + // Compiles the kernel + auto program = Program(context, header_string + source_string); + try { + program.Build(device, options); + } catch (const CLCudaAPIBuildError &e) { + if (program.StatusIsCompilationWarningOrError(e.status())) { + fprintf(stdout, "OpenCL compiler error/warning: %s\n", + program.GetBuildInfo(device).c_str()); + } + throw; + } + + // Prints the elapsed compilation time in case of debugging in verbose mode + #ifdef VERBOSE + const auto elapsed_time = std::chrono::steady_clock::now() - start_time; + const auto timing = std::chrono::duration(elapsed_time).count(); + printf("[DEBUG] Completed compilation in %.2lf ms\n", timing); + #endif + + return program; +} + +// ================================================================================================= +} // namespace clblast diff --git a/src/utilities/compile.hpp b/src/utilities/compile.hpp new file mode 100644 index 00000000..bd4686eb --- /dev/null +++ b/src/utilities/compile.hpp @@ -0,0 +1,36 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains the CLBlast way to compile a kernel from source, used for the library and for +// the auto-tuners. +// +// ================================================================================================= + +#ifndef CLBLAST_UTILITIES_COMPILE_H_ +#define CLBLAST_UTILITIES_COMPILE_H_ + +#include +#include + +#include "utilities/utilities.hpp" + +namespace clblast { +// ================================================================================================= + +// Compiles a program from source code +Program CompileFromSource(const std::string &source_string, const Precision precision, + const std::string &routine_name, + const Device& device, const Context& context, + std::vector& options); + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_UTILITIES_COMPILE_H_ +#endif diff --git a/src/utilities/timing.hpp b/src/utilities/timing.hpp index 3a5e2cff..e8040058 100644 --- a/src/utilities/timing.hpp +++ b/src/utilities/timing.hpp @@ -21,7 +21,6 @@ #include #include "utilities/utilities.hpp" -#include "routines/common.hpp" namespace clblast { // ================================================================================================= -- cgit v1.2.3