summaryrefslogtreecommitdiff
path: root/CMakeLists.txt
blob: 115285eef0142b333157a3dcb99a717a013d4c29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
# ==================================================================================================
# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
# project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
# width of 100 characters per line.
#
# Author(s):
#   Cedric Nugteren <www.cedricnugteren.nl>
#
# ==================================================================================================

cmake_minimum_required(VERSION 2.8.11)

# Overrides for MSVC static runtime
option(OVERRIDE_MSVC_FLAGS_TO_MT "Override compiler flags for MSVC to build with a static runtime (/MT instead of /MD)" ON)
if(OVERRIDE_MSVC_FLAGS_TO_MT)
  set(CMAKE_USER_MAKE_RULES_OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/c_flag_overrides.cmake)
  set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_flag_overrides.cmake)
endif()

# CMake project details
project("clblast" C CXX)
set(clblast_VERSION_MAJOR 1)
set(clblast_VERSION_MINOR 6)
set(clblast_VERSION_PATCH 1)
set(clblast_VERSION "${clblast_VERSION_MAJOR}.${clblast_VERSION_MINOR}.${clblast_VERSION_PATCH}")
set(clblast_SOVERSION ${clblast_VERSION_MAJOR})

# Policies
cmake_policy(SET CMP0074 NEW)  # to make -DCBLAS_ROOT= work with newer CMake versions as well

# Options and their default values
option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON)
option(SAMPLES "Enable compilation of the examples" OFF)
option(TUNERS "Enable compilation of the tuners" ON)
option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF)
option(TESTS "Enable compilation of the correctness tests" OFF)
option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF)

# The optional Netlib API for CLBlast
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
option(NETLIB_PERSISTENT_OPENCL "Makes OpenCL device and context in the CBLAS Netlib API static" OFF)
if(NETLIB)
  message("-- Building the Netlib API of CLBlast")
  if(NETLIB_PERSISTENT_OPENCL)
    message("   ^^ while using static variables for OpenCL device and context")
    add_definitions(-DNETLIB_PERSISTENT_OPENCL)
  endif()
endif()

# Workarounds for bugs
option(AMD_SI_EMPTY_KERNEL_WORKAROUND "Enables workaround for bug in AMD Southern Island GPUs" OFF)
if(AMD_SI_EMPTY_KERNEL_WORKAROUND)
  add_definitions(-DAMD_SI_EMPTY_KERNEL_WORKAROUND)
endif()

# Select between an OpenCL API (default) or a CUDA API (beta)
option(OPENCL "Build CLBlast with an OpenCL API (default)" ON)
option(CUDA "Build CLBlast with a CUDA API (beta)" OFF)
if(NOT OPENCL AND NOT CUDA)
  message(FATAL_ERROR "No API selected, choose from OpenCL (-DOPENCL=ON) or CUDA (-DCUDA=ON)")
endif()
if(OPENCL AND CUDA)
  message(FATAL_ERROR "Multiple APIs selected, choose either OpenCL (-DOPENCL=ON -DCUDA=OFF) or CUDA (-DCUDA=ON -DOPENCL=OFF)")
endif()
if(OPENCL)
  message("-- Building CLBlast with OpenCL API (default)")
  add_definitions(-DOPENCL_API)
elseif(CUDA)
  message("-- Building CLBlast with CUDA API (beta)")
  add_definitions(-DCUDA_API)
endif()

# Compile in verbose mode with additional diagnostic messages
option(VERBOSE "Compile in verbose mode for additional diagnostic messages" OFF)
if(VERBOSE)
  message("-- Building in verbose mode")
  add_definitions(-DVERBOSE)
endif()

# ==================================================================================================

# RPATH settings
set(CMAKE_MACOSX_RPATH 1)

# ==================================================================================================

# Compiler-version check (requires at least CMake 2.8.10)
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
    message(FATAL_ERROR "GCC version must be at least 4.7")
  endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
    message(FATAL_ERROR "Clang version must be at least 3.3")
  endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
    message(FATAL_ERROR "AppleClang version must be at least 5.0")
  endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14.0)
    message(FATAL_ERROR "ICC version must be at least 14.0")
  endif()
elseif(MSVC)
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
    message(FATAL_ERROR "MS Visual Studio version must be at least 18.0")
  endif()
endif()

# DLL Settings
if(MSVC)
  if(BUILD_SHARED_LIBS)
    add_definitions(" /DCLBLAST_DLL")
  endif()
endif(MSVC)

# C++ compiler settings
if(MSVC)
  set(FLAGS "/Ot")
  set(FLAGS "${FLAGS} /wd4715 /D_CRT_SECURE_NO_WARNINGS")
else()
  set(FLAGS "-std=c++11")
  if(VERBOSE)
    set(FLAGS "${FLAGS} -O1 -g")
  else()
    set(FLAGS "${FLAGS} -O2")
  endif()
  if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
    set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn -Wno-unused-function")
    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0)
      set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable")
    endif()
    if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0.0)
      # GCC does not support attributes on template arguments
      # in particular we hit this with the alignment attributes on cl_XXX types
      # which are then used to instantiate various templates in CLBlast
      set(FLAGS "${FLAGS} -Wno-ignored-attributes")
    endif()
  elseif(CMAKE_CXX_COMPILER_ID MATCHES Clang)
    set(FLAGS "${FLAGS} -Wextra -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded")
    set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch")
    set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn")
    set(FLAGS "${FLAGS} -Wno-deprecated-declarations -Wno-unused-function")
    if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.0)  # clang 4.0 or higher
        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0)  # but not for AppleClang
          set(FLAGS "${FLAGS} -Wno-undefined-var-template")
        endif()
    endif()
  endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")

# C compiler settings (for the sample)
if(MSVC)
  set(CFLAGS "/Ot")
else()
  set(CFLAGS "-O2 -std=c99")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CFLAGS}")

# ==================================================================================================

# Package scripts location
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${clblast_SOURCE_DIR}/cmake/Modules/")

if(OPENCL)
  # Requires OpenCL. It is found through the included "FindOpenCL.cmake" in CMAKE_MODULE_PATH.
  find_package(OpenCL REQUIRED)
  set(API_LIBRARIES ${OPENCL_LIBRARIES})
  set(API_INCLUDE_DIRS ${OPENCL_INCLUDE_DIRS})
elseif(CUDA)
  # For CUDA, the "FindCUDA.cmake" is part of CMake
  find_package(CUDA REQUIRED)
  set(API_LIBRARIES cuda nvrtc)
  set(API_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
  link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
endif()

# Don't search for system libraries when cross-compiling
if(${CMAKE_SYSTEM_NAME} STREQUAL Android)
  if(TESTS)
    message(STATUS "Compilation of the tests disabled for the Android target")
    set(TESTS OFF)
  endif()
  if(CLIENTS)
    message(STATUS "Head-to-head performance comparison not supported in the clients for the Android target")
  endif()
else()

  # Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake",
  # "FindCBLAS.cmake", "FindMKL.cmake", and "FindcuBLAS.cmake" are included.
  if(CLIENTS OR TESTS)
    find_package(CBLAS)
    find_package(MKL)
    if(OPENCL)
      find_package(clBLAS)
    endif()
    if(CUBLAS)
      find_package(cuBLAS)
    endif()
    if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND AND NOT MKL_FOUND)
      if(TESTS)
        message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests")
        set(TESTS OFF)
      endif()
      if(CLIENTS)
        message(STATUS "Could NOT find clBLAS nor a CPU BLAS, head-to-head performance comparison not supported in the clients")
      endif()
    endif()
  endif()
endif()

# ==================================================================================================

# Sets the supported routines and the used kernels. New routines and kernels should be added here.
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
            xgemm xgemm_direct xgemv invert xconvgemm)
set(DATABASES copy pad padtranspose transpose xaxpy xdot
              xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert
              gemm_routine trsv_routine xconvgemm)
set(ROUTINE_TUNERS xgemm xtrsv)
set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)
set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv
                    xger xgeru xgerc xher xhpr xher2 xhpr2 xsyr xspr xsyr2 xspr2)
set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm)
set(LEVELX_ROUTINES xhad xomatcopy xim2col xcol2im xconvgemm xaxpybatched xgemmbatched xgemmstridedbatched)
set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES} ${LEVELX_ROUTINES})
set(PRECISIONS 32 64 3232 6464 16)

# Sample programs
if(OPENCL)
  set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched dtrsm tuning_api)
  set(SAMPLE_PROGRAMS_C sasum samax dgemv sgemm haxpy cache)
  if(NETLIB)
    set(SAMPLE_PROGRAMS_C ${SAMPLE_PROGRAMS_C} sgemm_netlib)
  endif()
elseif(CUDA)
  set(SAMPLE_PROGRAMS_CPP daxpy_cuda sgemm_cuda)
  set(SAMPLE_PROGRAMS_C )
endif()

# ==================================================================================================

# Gathers all source-files (required for the compiler) and header-files (for IDEs only)
set(SOURCES
  src/database/database.cpp
  src/routines/common.cpp
  src/utilities/compile.cpp
  src/utilities/clblast_exceptions.cpp
  src/utilities/timing.cpp
  src/utilities/utilities.cpp
  src/api_common.cpp
  src/cache.cpp
  src/kernel_preprocessor.cpp
  src/routine.cpp
  src/routines/levelx/xinvert.cpp  # only source, don't include it as a test
  src/tuning/configurations.cpp
)
set(HEADERS  # such that they can be discovered by IDEs such as CLion and Visual Studio
  include/clblast_half.h
  src/database/apple_cpu_fallback.hpp
  src/database/database.hpp
  src/database/database_structure.hpp
  src/routines/level1/xamin.hpp
  src/routines/level1/xmax.hpp
  src/routines/level1/xmin.hpp
  src/routines/level1/xsum.hpp
  src/routines/common.hpp
  src/routines/routines.hpp
  src/utilities/buffer_test.hpp
  src/utilities/compile.hpp
  src/utilities/clblast_exceptions.hpp
  src/utilities/device_mapping.hpp
  src/utilities/msvc.hpp
  src/utilities/timing.hpp
  src/utilities/utilities.hpp
  src/cache.hpp
  src/kernel_preprocessor.hpp
  src/cxpp11_common.hpp
  src/routine.hpp
  src/tuning/configurations.hpp
  src/tuning/tuning.hpp
  src/tuning/routines/routine_tuner.hpp
)
if(OPENCL)
  set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp src/tuning/tuning_api.cpp)
  set(HEADERS ${HEADERS} include/clblast.h include/clblast_c.h src/clpp11.hpp)
  if(NETLIB)
    set(SOURCES ${SOURCES} src/clblast_netlib_c.cpp)
    set(HEADERS ${HEADERS} include/clblast_netlib_c.h)
  endif()
elseif(CUDA)
  set(SOURCES ${SOURCES} src/clblast_cuda.cpp)
  set(HEADERS ${HEADERS} include/clblast_cuda.h src/cupp11.hpp)
endif()
foreach(ROUTINE ${LEVEL1_ROUTINES})
  set(SOURCES ${SOURCES} src/routines/level1/${ROUTINE}.cpp)
  set(HEADERS ${HEADERS} src/routines/level1/${ROUTINE}.hpp)
endforeach()
foreach(ROUTINE ${LEVEL2_ROUTINES})
  set(SOURCES ${SOURCES} src/routines/level2/${ROUTINE}.cpp)
  set(HEADERS ${HEADERS} src/routines/level2/${ROUTINE}.hpp)
endforeach()
foreach(ROUTINE ${LEVEL3_ROUTINES})
  set(SOURCES ${SOURCES} src/routines/level3/${ROUTINE}.cpp)
  set(HEADERS ${HEADERS} src/routines/level3/${ROUTINE}.hpp)
endforeach()
foreach(ROUTINE ${LEVELX_ROUTINES})
  set(SOURCES ${SOURCES} src/routines/levelx/${ROUTINE}.cpp)
  set(HEADERS ${HEADERS} src/routines/levelx/${ROUTINE}.hpp)
endforeach()
foreach(DATABASE ${DATABASES})
  set(SOURCES ${SOURCES} src/database/kernels/${DATABASE}/${DATABASE}.cpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}.hpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_16.hpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_32.hpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_64.hpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_3232.hpp)
  set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_6464.hpp)
endforeach()
foreach(KERNEL ${KERNELS})
  set(HEADERS ${HEADERS} src/tuning/kernels/${KERNEL}.hpp)
endforeach()

# Creates and links the library
if(BUILD_SHARED_LIBS)
  add_library(clblast SHARED ${SOURCES} ${HEADERS})
else(BUILD_SHARED_LIBS)
  add_library(clblast STATIC ${SOURCES} ${HEADERS})
endif()
set_target_properties(clblast PROPERTIES VERSION ${clblast_VERSION} SOVERSION ${clblast_SOVERSION})

target_link_libraries(clblast ${API_LIBRARIES})

# Includes directories: CLBlast and OpenCL
target_include_directories(clblast PUBLIC
                           $<BUILD_INTERFACE:${clblast_SOURCE_DIR}/include>
                           $<BUILD_INTERFACE:${clblast_SOURCE_DIR}/src>
                           $<INSTALL_INTERFACE:include>
                           ${API_INCLUDE_DIRS})

# Sets the proper __declspec(dllexport) keyword for Visual Studio when the library is built
if(MSVC)
  if(BUILD_SHARED_LIBS)
    target_compile_definitions(clblast PRIVATE COMPILING_DLL=1) # requires at least CMake 2.8.11
  endif()
endif()

# Installs the library
include(GNUInstallDirs)

install(TARGETS clblast EXPORT CLBlast DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(FILES include/clblast_half.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(OPENCL)
  install(FILES include/clblast.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  install(FILES include/clblast_c.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  if(NETLIB)
    install(FILES include/clblast_netlib_c.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  endif()
elseif(CUDA)
  install(FILES include/clblast_cuda.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

# Installs the config for find_package in dependent projects
install(EXPORT CLBlast DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CLBlast FILE CLBlastConfig.cmake)

# Install pkg-config file on Linux
if(UNIX)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/clblast.pc.in"
                   "${CMAKE_CURRENT_BINARY_DIR}/clblast.pc" @ONLY IMMEDIATE)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/clblast.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
endif()

# ==================================================================================================

# This section contains all the code related to the examples
if(SAMPLES)

  # Downloads the opencl.hpp file from Khronos
  if(OPENCL)
    file(DOWNLOAD https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp ${clblast_SOURCE_DIR}/samples/opencl.hpp)
  endif()

  # Adds sample programs (C++)
  foreach(SAMPLE ${SAMPLE_PROGRAMS_CPP})
    add_executable(clblast_sample_${SAMPLE} samples/${SAMPLE}.cpp)
    target_link_libraries(clblast_sample_${SAMPLE} clblast ${API_LIBRARIES})
    install(TARGETS clblast_sample_${SAMPLE} DESTINATION bin)
  endforeach()

  # Adds sample programs (C)
  foreach(SAMPLE ${SAMPLE_PROGRAMS_C})
    add_executable(clblast_sample_${SAMPLE}_c samples/${SAMPLE}.c)
    target_link_libraries(clblast_sample_${SAMPLE}_c clblast ${API_LIBRARIES})
    install(TARGETS clblast_sample_${SAMPLE}_c DESTINATION bin)
  endforeach()

endif()

# ==================================================================================================

# This section contains all the code related to the tuners
if(TUNERS)

  set(TUNERS_COMMON
      src/utilities/compile.cpp
      src/utilities/clblast_exceptions.cpp
      src/utilities/timing.cpp
      src/utilities/utilities.cpp
      src/tuning/configurations.cpp
      src/tuning/tuning.cpp
      src/kernel_preprocessor.cpp)
  set(TUNERS_HEADERS  # such that they can be discovered by IDEs such as CLion and Visual Studio
      src/utilities/compile.hpp
      src/utilities/clblast_exceptions.hpp
      src/utilities/timing.hpp
      src/utilities/utilities.hpp
      src/tuning/configurations.hpp
      src/tuning/tuning.hpp
      src/tuning/routines/routine_tuner.hpp
      src/kernel_preprocessor.hpp)
  set(TUNERS_COMMON ${TUNERS_COMMON} ${TUNERS_HEADERS})

  # Creates a library with common sources for all tuners
  if(MSVC)
    # Visual Studio requires the sources of non-exported objects/libraries
  else()
    # Creates the common performance-tests objects (requires CMake 2.8.8)
    add_library(tuners_common_library OBJECT ${TUNERS_COMMON})

    # Adds CLBlast's interface include paths because we can't link to CLBlast here
    target_include_directories(tuners_common_library PRIVATE
                               $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES>
                               ${clblast_SOURCE_DIR} ${API_INCLUDE_DIRS})
    set(TUNERS_COMMON $<TARGET_OBJECTS:tuners_common_library>)
  endif()

  # Adds tuning executables
  set(ALLKERNELS ${KERNELS})
  foreach(KERNEL ${ALLKERNELS})
    add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} src/tuning/kernels/${KERNEL}.cpp)
    target_link_libraries(clblast_tuner_${KERNEL} ${API_LIBRARIES})
    target_include_directories(clblast_tuner_${KERNEL} PUBLIC $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES> ${API_INCLUDE_DIRS})
    install(TARGETS clblast_tuner_${KERNEL} DESTINATION bin)
  endforeach()
  if(OPENCL)
    foreach(ROUTINE_TUNER ${ROUTINE_TUNERS})
      add_executable(clblast_tuner_routine_${ROUTINE_TUNER} ${TUNERS_COMMON} src/tuning/routines/${ROUTINE_TUNER}.cpp test/test_utilities.cpp)
      target_link_libraries(clblast_tuner_routine_${ROUTINE_TUNER} clblast)
      target_include_directories(clblast_tuner_routine_${ROUTINE_TUNER} PUBLIC $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES> ${API_INCLUDE_DIRS} ${clblast_SOURCE_DIR})
      install(TARGETS clblast_tuner_routine_${ROUTINE_TUNER} DESTINATION bin)
    endforeach()
  endif()

  # Adds 'alltuners' target: runs all tuners for all precisions
  set(ALLTUNERS )
  set(ALLTUNERSDEPENDS )
  foreach(KERNEL ${KERNELS})
    foreach(PRECISION ${PRECISIONS})
      set(ALLTUNERS ${ALLTUNERS} COMMAND clblast_tuner_${KERNEL} -precision ${PRECISION})
    endforeach()
    set(ALLTUNERSDEPENDS clblast_tuner_${KERNEL})
  endforeach()
  if(OPENCL)
    foreach(ROUTINE_TUNER ${ROUTINE_TUNERS})
      foreach(PRECISION ${PRECISIONS})
        set(ALLTUNERS ${ALLTUNERS} COMMAND clblast_tuner_routine_${ROUTINE_TUNER} -precision ${PRECISION})
      endforeach()
      set(ALLTUNERSDEPENDS clblast_tuner_routine_${ROUTINE_TUNER})
    endforeach()
  endif()
  add_custom_target(alltuners ${ALLTUNERS} DEPENDS ${ALLTUNERSDEPENDS})

endif()

# ==================================================================================================

# Section for the tests: common part for both performance ('CLIENTS') and correctness ('TESTS')
if(CLIENTS OR TESTS)

  # Sets the specifics for the reference BLAS libraries
  set(REF_INCLUDES )
  set(REF_LIBRARIES )
  if(CLBLAS_FOUND)
    find_package(Threads)
    set(REF_LIBRARIES ${REF_LIBRARIES} ${CLBLAS_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
    set(REF_INCLUDES ${REF_INCLUDES} ${CLBLAS_INCLUDE_DIRS})
    set(WRAPPERS ${WRAPPERS} test/wrapper_clblas.hpp)
    if(MSVC)
      add_definitions(" /DCLBLAST_REF_CLBLAS")
    else()
      add_definitions(" -DCLBLAST_REF_CLBLAS")
    endif()
  endif()
  if(CBLAS_FOUND OR MKL_FOUND)
    if(MKL_FOUND)  # prefers MKL over another CBLAS implementation
      set(REF_INCLUDES ${REF_INCLUDES} ${MKL_INCLUDE_DIRS})
      set(REF_LIBRARIES ${REF_LIBRARIES} ${MKL_LIBRARIES})
      if(MSVC)
        add_definitions(" /DCLBLAST_REF_CBLAS_MKL")
      else()
        add_definitions(" -DCLBLAST_REF_CBLAS_MKL")
      endif()
    else()
      set(REF_INCLUDES ${REF_INCLUDES} ${CBLAS_INCLUDE_DIRS})
      set(REF_LIBRARIES ${REF_LIBRARIES} ${CBLAS_LIBRARIES})
    endif()
    set(WRAPPERS ${WRAPPERS} test/wrapper_cblas.hpp)
    if(MSVC)
      add_definitions(" /DCLBLAST_REF_CBLAS")
    else()
      add_definitions(" -DCLBLAST_REF_CBLAS")
    endif()
  endif()
  if(CUBLAS_FOUND)
    set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS})
    set(REF_LIBRARIES ${REF_LIBRARIES} ${CUDA_LIBRARIES} ${CUBLAS_LIBRARIES})
    set(WRAPPERS ${WRAPPERS} test/wrapper_cuda.hpp test/wrapper_cublas.hpp)
    if(MSVC)
      add_definitions(" /DCLBLAST_REF_CUBLAS")
    else()
      add_definitions(" -DCLBLAST_REF_CUBLAS")
    endif()
  endif()

endif()

# ==================================================================================================

# Section for the performance tests (i.e. the client). These compare against optionally a reference
# library, either clBLAS, a CPU BLAS, or CUDA's cuBLAS.
if(CLIENTS)
  set(CLIENTS_COMMON ${WRAPPERS} test/test_utilities.hpp
                     test/performance/client.hpp test/routines/common.hpp)

  # Visual Studio requires the sources of non-exported objects/libraries
  if(MSVC)
    set(CLIENTS_COMMON ${CLIENTS_COMMON} src/utilities/utilities.cpp test/test_utilities.cpp
                       test/performance/client.cpp)
  else()
    # Creates the common performance-tests objects (requires CMake 2.8.8)
    add_library(test_performance_common OBJECT test/test_utilities.cpp test/performance/client.cpp)

    # Adds CLBlast's interface include paths because we can't link to CLBlast here
    target_include_directories(test_performance_common PRIVATE
                               $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES>
                               ${clblast_SOURCE_DIR} ${REF_INCLUDES})
    set(CLIENTS_COMMON ${CLIENTS_COMMON} $<TARGET_OBJECTS:test_performance_common>)
  endif()

  # Compiles the performance-tests
  foreach(ROUTINE ${LEVEL1_ROUTINES})
    add_executable(clblast_client_${ROUTINE} ${CLIENTS_COMMON}
                   test/performance/routines/level1/${ROUTINE}.cpp
                   test/routines/level1/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVEL2_ROUTINES})
    add_executable(clblast_client_${ROUTINE} ${CLIENTS_COMMON}
                   test/performance/routines/level2/${ROUTINE}.cpp
                   test/routines/level2/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVEL3_ROUTINES})
    add_executable(clblast_client_${ROUTINE} ${CLIENTS_COMMON}
                   test/performance/routines/level3/${ROUTINE}.cpp
                   test/routines/level3/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVELX_ROUTINES})
    add_executable(clblast_client_${ROUTINE} ${CLIENTS_COMMON}
                   test/performance/routines/levelx/${ROUTINE}.cpp
                   test/routines/levelx/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${ROUTINES})
    target_link_libraries(clblast_client_${ROUTINE} clblast ${REF_LIBRARIES} ${API_LIBRARIES})
    target_include_directories(clblast_client_${ROUTINE} PUBLIC ${clblast_SOURCE_DIR} ${REF_INCLUDES})
    install(TARGETS clblast_client_${ROUTINE} DESTINATION bin)
  endforeach()

endif()

# ==================================================================================================

# Section for the correctness tests. Note that these tests require the presence of clBLAS and/or a
# CPU BLAS library, and/or cuBLAS to act as a reference.
if(TESTS)
  enable_testing()
  set(TESTS_COMMON ${WRAPPERS} test/test_utilities.hpp test/correctness/testblas.hpp
                   test/correctness/tester.hpp test/routines/common.hpp)

  # Visual Studio requires the sources of non-exported objects/libraries
  if(MSVC)
    set(TESTS_COMMON ${TESTS_COMMON} src/utilities/utilities.cpp test/test_utilities.cpp
        test/correctness/tester.cpp test/correctness/testblas.cpp)
  else()
    # Creates the common correctness-tests objects (requires CMake 2.8.8)
    add_library(test_correctness_common OBJECT
                test/test_utilities.cpp test/correctness/tester.cpp test/correctness/testblas.cpp)
    target_include_directories(test_correctness_common PUBLIC
                               $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES>
                               ${clblast_SOURCE_DIR} ${REF_INCLUDES})
    set(TESTS_COMMON ${TESTS_COMMON} $<TARGET_OBJECTS:test_correctness_common>)
  endif()

  # Compiles the correctness-tests
  foreach(ROUTINE ${LEVEL1_ROUTINES})
    add_executable(clblast_test_${ROUTINE} ${TESTS_COMMON}
                   test/correctness/routines/level1/${ROUTINE}.cpp
                   test/routines/level1/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVEL2_ROUTINES})
    add_executable(clblast_test_${ROUTINE} ${TESTS_COMMON}
                   test/correctness/routines/level2/${ROUTINE}.cpp
                   test/routines/level2/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVEL3_ROUTINES})
    add_executable(clblast_test_${ROUTINE} ${TESTS_COMMON}
                   test/correctness/routines/level3/${ROUTINE}.cpp
                   test/routines/level3/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${LEVELX_ROUTINES})
    add_executable(clblast_test_${ROUTINE} ${TESTS_COMMON}
                   test/correctness/routines/levelx/${ROUTINE}.cpp
                   test/routines/levelx/${ROUTINE}.hpp)
  endforeach()
  foreach(ROUTINE ${ROUTINES})
    target_link_libraries(clblast_test_${ROUTINE} clblast ${REF_LIBRARIES} ${API_LIBRARIES})
    install(TARGETS clblast_test_${ROUTINE} DESTINATION bin)
    target_include_directories(clblast_test_${ROUTINE} PUBLIC ${clblast_SOURCE_DIR} ${REF_INCLUDES})
    add_test(clblast_test_${ROUTINE} clblast_test_${ROUTINE})
  endforeach()

  # Miscellaneous tests
  set(MISC_TESTS override_parameters retrieve_parameters)
  if(NOT CUDA)
    set(MISC_TESTS ${MISC_TESTS} preprocessor)
  endif()
  if(MSVC)
    set(TESTS_COMMON ${TESTS_COMMON} src/kernel_preprocessor.cpp src/utilities/compile.cpp)
  endif()
  foreach(MISC_TEST ${MISC_TESTS})
    add_executable(clblast_test_${MISC_TEST} ${TESTS_COMMON}
                   test/correctness/misc/${MISC_TEST}.cpp)
    target_link_libraries(clblast_test_${MISC_TEST} clblast ${REF_LIBRARIES} ${API_LIBRARIES})
    target_include_directories(clblast_test_${MISC_TEST} PUBLIC ${clblast_SOURCE_DIR} ${REF_INCLUDES})
    add_test(clblast_test_${MISC_TEST} clblast_test_${MISC_TEST})
  endforeach()

  # CLBlast diagnostics
  add_executable(clblast_test_diagnostics ${TESTS_COMMON} test/diagnostics.cpp)
  target_link_libraries(clblast_test_diagnostics clblast ${REF_LIBRARIES} ${API_LIBRARIES})
  target_include_directories(clblast_test_diagnostics PUBLIC
                             $<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES>
                             ${clblast_SOURCE_DIR} ${REF_INCLUDES})

  # Adds 'alltests' target: runs all tests
  set(ALLTESTS )
  set(ALLTESTSDEPENDS )
  foreach(ROUTINE ${ROUTINES})
    set(ALLTESTS ${ALLTESTS} COMMAND clblast_test_${ROUTINE})
    set(ALLTESTSDEPENDS clblast_test_${ROUTINE})
  endforeach()
  add_custom_target(alltests ${ALLTESTS} DEPENDS ${ALLTESTSDEPENDS})

endif()

# ==================================================================================================