Moved all headers into the source tree, changed headers to .hpp extension

author: Cedric Nugteren <web@cedricnugteren.nl> 2016-06-18 20:20:13 +0200
committer: Cedric Nugteren <web@cedricnugteren.nl> 2016-06-18 20:20:13 +0200
commit: f726fbdc9fef937fbe32222f0e66aac8d7e2678c (patch)
tree: cb62cc877ea239052fb1882f7bf327aace3e7776 /include/internal
parent: bacb5d2bb2ea7b141034878090aca850db8f9d00 (diff)
62 files changed, 0 insertions, 5992 deletions
diff --git a/include/internal/buffer_test.h b/include/internal/buffer_test.h
deleted file mode 100644
index 80f5243f..00000000
--- a/include/internal/buffer_test.h
+++ /dev/null
@@ -1,121 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the tests for the OpenCL buffers (matrices and vectors). These tests are
-// templated and thus header-only.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_BUFFER_TEST_H_
-#define CLBLAST_BUFFER_TEST_H_
-
-#include "clblast.h"
-
-namespace clblast {
-// =================================================================================================
-
-// Tests matrix 'A' for validity
-template <typename T>
-StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
-                       const size_t offset, const size_t ld) {
-  if (ld < one) { return StatusCode::kInvalidLeadDimA; }
-  try {
-    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; }
-  } catch (...) { return StatusCode::kInvalidMatrixA; }
-  return StatusCode::kSuccess;
-}
-
-// Tests matrix 'B' for validity
-template <typename T>
-StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer,
-                       const size_t offset, const size_t ld) {
-  if (ld < one) { return StatusCode::kInvalidLeadDimB; }
-  try {
-    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryB; }
-  } catch (...) { return StatusCode::kInvalidMatrixB; }
-  return StatusCode::kSuccess;
-}
-
-// Tests matrix 'C' for validity
-template <typename T>
-StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer,
-                       const size_t offset, const size_t ld) {
-  if (ld < one) { return StatusCode::kInvalidLeadDimC; }
-  try {
-    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryC; }
-  } catch (...) { return StatusCode::kInvalidMatrixC; }
-  return StatusCode::kSuccess;
-}
-
-// Tests matrix 'AP' for validity
-template <typename T>
-StatusCode TestMatrixAP(const size_t n, const Buffer<T> &buffer, const size_t offset) {
-  try {
-    const auto required_size = (((n * (n + 1)) / 2) + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; }
-  } catch (...) { return StatusCode::kInvalidMatrixA; }
-  return StatusCode::kSuccess;
-}
-
-// =================================================================================================
-
-// Tests vector 'X' for validity
-template <typename T>
-StatusCode TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset,
-                       const size_t inc) {
-  if (inc == 0) { return StatusCode::kInvalidIncrementX; }
-  try {
-    const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryX; }
-  } catch (...) { return StatusCode::kInvalidVectorX; }
-  return StatusCode::kSuccess;
-}
-
-// Tests vector 'Y' for validity
-template <typename T>
-StatusCode TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset,
-                       const size_t inc) {
-  if (inc == 0) { return StatusCode::kInvalidIncrementY; }
-  try {
-    const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryY; }
-  } catch (...) { return StatusCode::kInvalidVectorY; }
-  return StatusCode::kSuccess;
-}
-
-// =================================================================================================
-
-// Tests vector 'scalar' for validity
-template <typename T>
-StatusCode TestVectorScalar(const size_t n, const Buffer<T> &buffer, const size_t offset) {
-  try {
-    const auto required_size = (n + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; }
-  } catch (...) { return StatusCode::kInvalidVectorScalar; }
-  return StatusCode::kSuccess;
-}
-
-// Tests vector 'index' for validity
-template <typename T>
-StatusCode TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offset) {
-  try {
-    const auto required_size = (n + offset) * sizeof(T);
-    if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; }
-  } catch (...) { return StatusCode::kInvalidVectorScalar; }
-  return StatusCode::kSuccess;
-}
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_BUFFER_TEST_H_
-#endif
diff --git a/include/internal/cache.h b/include/internal/cache.h
deleted file mode 100644
index bc7e87d9..00000000
--- a/include/internal/cache.h
+++ /dev/null
@@ -1,98 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the caching functionality of compiled binaries and programs.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_CACHE_H_
-#define CLBLAST_CACHE_H_
-
-#include <string>
-#include <vector>
-#include <mutex>
-
-#include "internal/utilities.h"
-
-namespace clblast {
-// =================================================================================================
-
-// The cache of compiled OpenCL binaries, along with some meta-data
-struct BinaryCache {
-  std::string binary;
-  std::string device_name;
-  Precision precision;
-  std::string routine_name_;
-
-  // Finds out whether the properties match
-  bool MatchInCache(const std::string &ref_device, const Precision &ref_precision,
-                    const std::string &ref_routine) {
-    return (device_name == ref_device &&
-            precision == ref_precision &&
-            routine_name_ == ref_routine);
-  }
-};
-
-// The actual cache, implemented as a vector of the above data-type, and its mutex
-static std::vector<BinaryCache> binary_cache_;
-static std::mutex binary_cache_mutex_;
-
-// =================================================================================================
-
-// The cache of compiled OpenCL programs, along with some meta-data
-struct ProgramCache {
-  Program program;
-  ContextPointer context_ptr;
-  Precision precision;
-  std::string routine_name_;
-
-  // Finds out whether the properties match
-  bool MatchInCache(const ContextPointer ref_context, const Precision &ref_precision,
-                    const std::string &ref_routine) {
-    return (context_ptr == ref_context &&
-            precision == ref_precision &&
-            routine_name_ == ref_routine);
-  }
-};
-
-// The actual cache, implemented as a vector of the above data-type, and its mutex
-static std::vector<ProgramCache> program_cache_;
-static std::mutex program_cache_mutex_;
-
-// =================================================================================================
-
-// Stores the compiled binary or program in the cache
-void StoreBinaryToCache(const std::string &binary, const std::string &device_name,
-                        const Precision &precision, const std::string &routine_name);
-void StoreProgramToCache(const Program &program, const Context &context,
-                         const Precision &precision, const std::string &routine_name);
-
-// Queries the cache and retrieves a matching binary or program. Assumes that the match is
-// available, throws otherwise.
-const std::string& GetBinaryFromCache(const std::string &device_name, const Precision &precision,
-                                      const std::string &routine_name);
-const Program& GetProgramFromCache(const Context &context, const Precision &precision,
-                                   const std::string &routine_name);
-
-// Queries the cache to see whether or not the compiled kernel is already there
-bool BinaryIsInCache(const std::string &device_name, const Precision &precision,
-                     const std::string &routine_name);
-bool ProgramIsInCache(const Context &context, const Precision &precision,
-                      const std::string &routine_name);
-
-// =================================================================================================
-
-// Clears the cache of stored binaries
-StatusCode CacheClearAll();
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_CACHE_H_
-#endif
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h
deleted file mode 100644
index b834d8b4..00000000
--- a/include/internal/clpp11.h
+++ /dev/null
@@ -1,695 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements a bunch of C++11 classes that act as wrappers around OpenCL objects and API
-// calls. The main benefits are increased abstraction, automatic memory management, and portability.
-// Portability here means that a similar header exists for CUDA with the same classes and
-// interfaces. In other words, moving from the OpenCL API to the CUDA API becomes a one-line change.
-//
-// This file is taken from the Claduc project <https://github.com/CNugteren/Claduc> and therefore
-// contains the following header copyright notice:
-//
-// =================================================================================================
-//
-// Copyright 2015 SURFsara
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// 
-//  http://www.apache.org/licenses/LICENSE-2.0
-// 
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_CLPP11_H_
-#define CLBLAST_CLPP11_H_
-
-// C++
-#include <algorithm> // std::copy
-#include <string>    // std::string
-#include <vector>    // std::vector
-#include <memory>    // std::shared_ptr
-#include <stdexcept> // std::runtime_error
-#include <numeric>   // std::accumulate
-
-// OpenCL
-#if defined(__APPLE__) || defined(__MACOSX)
-  #include <OpenCL/opencl.h>
-#else
-  #include <CL/opencl.h>
-#endif
-
-namespace clblast {
-// =================================================================================================
-
-// Error occurred in the C++11 OpenCL header (this file)
-inline void Error(const std::string &message) {
-  throw std::runtime_error("Internal OpenCL error: "+message);
-}
-
-// Error occurred in OpenCL
-inline void CheckError(const cl_int status) {
-  if (status != CL_SUCCESS) {
-    throw std::runtime_error("Internal OpenCL error: "+std::to_string(status));
-  }
-}
-
-// =================================================================================================
-
-// C++11 version of 'cl_event'
-class Event {
- public:
-
-  // Constructor based on the regular OpenCL data-type
-  explicit Event(const cl_event event): event_(event) { }
-
-  // Regular constructor
-  explicit Event(): event_(nullptr) { }
-
-  // Waits for completion of this event
-  void WaitForCompletion() const {
-    CheckError(clWaitForEvents(1, &event_));
-  }
-
-  // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
-  // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
-  // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx
-  float GetElapsedTime() const {
-    WaitForCompletion();
-    auto bytes = size_t{0};
-    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
-    auto time_start = size_t{0};
-    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
-    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
-    auto time_end = size_t{0};
-    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
-    return (time_end - time_start) * 1.0e-6f;
-  }
-
-  // Accessor to the private data-member
-  cl_event& operator()() { return event_; }
-  cl_event* pointer() { return &event_; }
- private:
-  cl_event event_;
-};
-
-// Pointer to an OpenCL event
-using EventPointer = cl_event*;
-
-// =================================================================================================
-
-// C++11 version of 'cl_platform_id'
-class Platform {
- public:
-
-  // Constructor based on the regular OpenCL data-type
-  explicit Platform(const cl_platform_id platform): platform_(platform) { }
-
-  // Initializes the platform
-  explicit Platform(const size_t platform_id) {
-    auto num_platforms = cl_uint{0};
-    CheckError(clGetPlatformIDs(0, nullptr, &num_platforms));
-    if (num_platforms == 0) { Error("no platforms found"); }
-    auto platforms = std::vector<cl_platform_id>(num_platforms);
-    CheckError(clGetPlatformIDs(num_platforms, platforms.data(), nullptr));
-    if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
-    platform_ = platforms[platform_id];
-  }
-
-  // Returns the number of devices on this platform
-  size_t NumDevices() const {
-    auto result = cl_uint{0};
-    CheckError(clGetDeviceIDs(platform_, CL_DEVICE_TYPE_ALL, 0, nullptr, &result));
-    return static_cast<size_t>(result);
-  }
-
-  // Accessor to the private data-member
-  const cl_platform_id& operator()() const { return platform_; }
- private:
-  cl_platform_id platform_;
-};
-
-// =================================================================================================
-
-// C++11 version of 'cl_device_id'
-class Device {
- public:
-
-  // Constructor based on the regular OpenCL data-type
-  explicit Device(const cl_device_id device): device_(device) { }
-
-  // Initialize the device. Note that this constructor can throw exceptions!
-  explicit Device(const Platform &platform, const size_t device_id) {
-    auto num_devices = platform.NumDevices();
-    if (num_devices == 0) { Error("no devices found"); }
-    auto devices = std::vector<cl_device_id>(num_devices);
-    CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, static_cast<cl_uint>(num_devices),
-                              devices.data(), nullptr));
-    if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
-    device_ = devices[device_id];
-  }
-
-  // Methods to retrieve device information
-  std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
-  std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
-  std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
-  std::string Type() const {
-    auto type = GetInfo<cl_device_type>(CL_DEVICE_TYPE);
-    switch(type) {
-      case CL_DEVICE_TYPE_CPU: return "CPU";
-      case CL_DEVICE_TYPE_GPU: return "GPU";
-      case CL_DEVICE_TYPE_ACCELERATOR: return "accelerator";
-      default: return "default";
-    }
-  }
-  size_t MaxWorkGroupSize() const { return GetInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); }
-  size_t MaxWorkItemDimensions() const {
-    return GetInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
-  }
-  std::vector<size_t> MaxWorkItemSizes() const {
-    return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
-  }
-  size_t LocalMemSize() const {
-    return static_cast<size_t>(GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE));
-  }
-  std::string Capabilities() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
-  size_t CoreClock() const { return GetInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY); }
-  size_t ComputeUnits() const { return GetInfo(CL_DEVICE_MAX_COMPUTE_UNITS); }
-  size_t MemorySize() const { return GetInfo(CL_DEVICE_GLOBAL_MEM_SIZE); }
-  size_t MaxAllocSize() const { return GetInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE); }
-  size_t MemoryClock() const { return 0; } // Not exposed in OpenCL
-  size_t MemoryBusWidth() const { return 0; } // Not exposed in OpenCL
-
-  // Configuration-validity checks
-  bool IsLocalMemoryValid(const size_t local_mem_usage) const {
-    return (local_mem_usage <= LocalMemSize());
-  }
-  bool IsThreadConfigValid(const std::vector<size_t> &local) const {
-    auto local_size = size_t{1};
-    for (const auto &item: local) { local_size *= item; }
-    for (auto i=size_t{0}; i<local.size(); ++i) {
-      if (local[i] > MaxWorkItemSizes()[i]) { return false; }
-    }
-    if (local_size > MaxWorkGroupSize()) { return false; }
-    if (local.size() > MaxWorkItemDimensions()) { return false; }
-    return true;
-  }
-
-  // Query for a specific type of device or brand
-  bool IsCPU() const { return Type() == "CPU"; }
-  bool IsGPU() const { return Type() == "GPU"; }
-  bool IsAMD() const { return Vendor() == "AMD" || Vendor() == "Advanced Micro Devices, Inc."; }
-  bool IsARM() const { return Vendor() == "ARM"; }
-
-  // Accessor to the private data-member
-  const cl_device_id& operator()() const { return device_; }
- private:
-  cl_device_id device_;
-
-  // Private helper functions
-  template <typename T>
-  T GetInfo(const cl_device_info info) const {
-    auto bytes = size_t{0};
-    CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
-    auto result = T(0);
-    CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
-    return result;
-  }
-  size_t GetInfo(const cl_device_info info) const {
-    auto bytes = size_t{0};
-    CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
-    auto result = cl_uint(0);
-    CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
-    return static_cast<size_t>(result);
-  }
-  template <typename T>
-  std::vector<T> GetInfoVector(const cl_device_info info) const {
-    auto bytes = size_t{0};
-    CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
-    auto result = std::vector<T>(bytes/sizeof(T));
-    CheckError(clGetDeviceInfo(device_, info, bytes, result.data(), nullptr));
-    return result;
-  }
-  std::string GetInfoString(const cl_device_info info) const {
-    auto bytes = size_t{0};
-    CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
-    auto result = std::string{};
-    result.resize(bytes);
-    CheckError(clGetDeviceInfo(device_, info, bytes, &result[0], nullptr));
-    return std::string{result.c_str()}; // Removes any trailing '\0'-characters
-  }
-};
-
-// =================================================================================================
-
-// C++11 version of 'cl_context'
-class Context {
- public:
-
-  // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
-  explicit Context(const cl_context context):
-      context_(new cl_context) {
-    *context_ = context;
-  }
-
-  // Regular constructor with memory management
-  explicit Context(const Device &device):
-      context_(new cl_context, [](cl_context* c) { CheckError(clReleaseContext(*c)); delete c; }) {
-    auto status = CL_SUCCESS;
-    const cl_device_id dev = device();
-    *context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
-    CheckError(status);
-  }
-
-  // Accessor to the private data-member
-  const cl_context& operator()() const { return *context_; }
-  cl_context* pointer() const { return &(*context_); }
- private:
-  std::shared_ptr<cl_context> context_;
-};
-
-// Pointer to an OpenCL context
-using ContextPointer = cl_context*;
-
-// =================================================================================================
-
-// Enumeration of build statuses of the run-time compilation process
-enum class BuildStatus { kSuccess, kError, kInvalid };
-
-// C++11 version of 'cl_program'. Additionally holds the program's source code.
-class Program {
- public:
-  // Note that there is no constructor based on the regular OpenCL data-type because of extra state
-
-  // Source-based constructor with memory management
-  explicit Program(const Context &context, std::string source):
-      program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
-      length_(source.length()),
-      source_(std::move(source)),
-      source_ptr_(&source_[0]) {
-    auto status = CL_SUCCESS;
-    *program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
-    CheckError(status);
-  }
-
-  // Binary-based constructor with memory management
-  explicit Program(const Device &device, const Context &context, const std::string& binary):
-      program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
-      length_(binary.length()),
-      source_(binary),
-      source_ptr_(&source_[0]) {
-    auto status1 = CL_SUCCESS;
-    auto status2 = CL_SUCCESS;
-    const cl_device_id dev = device();
-    *program_ = clCreateProgramWithBinary(context(), 1, &dev, &length_,
-                                          reinterpret_cast<const unsigned char**>(&source_ptr_),
-                                          &status1, &status2);
-    CheckError(status1);
-    CheckError(status2);
-  }
-
-  // Compiles the device program and returns whether or not there where any warnings/errors
-  BuildStatus Build(const Device &device, std::vector<std::string> &options) {
-    auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
-    const cl_device_id dev = device();
-    auto status = clBuildProgram(*program_, 1, &dev, options_string.c_str(), nullptr, nullptr);
-    if (status == CL_BUILD_PROGRAM_FAILURE) {
-      return BuildStatus::kError;
-    }
-    else if (status == CL_INVALID_BINARY) {
-      return BuildStatus::kInvalid;
-    }
-    else {
-      CheckError(status);
-      return BuildStatus::kSuccess;
-    }
-  }
-
-  // Retrieves the warning/error message from the compiler (if any)
-  std::string GetBuildInfo(const Device &device) const {
-    auto bytes = size_t{0};
-    auto query = cl_program_build_info{CL_PROGRAM_BUILD_LOG};
-    CheckError(clGetProgramBuildInfo(*program_, device(), query, 0, nullptr, &bytes));
-    auto result = std::string{};
-    result.resize(bytes);
-    CheckError(clGetProgramBuildInfo(*program_, device(), query, bytes, &result[0], nullptr));
-    return result;
-  }
-
-  // Retrieves a binary or an intermediate representation of the compiled program
-  std::string GetIR() const {
-    auto bytes = size_t{0};
-    CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
-    auto result = std::string{};
-    result.resize(bytes);
-    auto result_ptr = result.data();
-    CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARIES, sizeof(char*), &result_ptr, nullptr));
-    return result;
-  }
-
-  // Accessor to the private data-member
-  const cl_program& operator()() const { return *program_; }
- private:
-  std::shared_ptr<cl_program> program_;
-  size_t length_;
-  std::string source_; // Note: the source can also be a binary or IR
-  const char* source_ptr_;
-};
-
-// =================================================================================================
-
-// C++11 version of 'cl_command_queue'
-class Queue {
- public:
-
-  // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
-  explicit Queue(const cl_command_queue queue):
-      queue_(new cl_command_queue) {
-    *queue_ = queue;
-  }
-
-  // Regular constructor with memory management
-  explicit Queue(const Context &context, const Device &device):
-      queue_(new cl_command_queue, [](cl_command_queue* s) { CheckError(clReleaseCommandQueue(*s));
-                                                             delete s; }) {
-    auto status = CL_SUCCESS;
-    #ifdef CL_VERSION_2_0
-      cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
-      *queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
-    #else
-      *queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
-    #endif
-    CheckError(status);
-  }
-
-  // Synchronizes the queue
-  void Finish(Event &) const {
-    Finish();
-  }
-  void Finish() const {
-    CheckError(clFinish(*queue_));
-  }
-
-  // Retrieves the corresponding context or device
-  Context GetContext() const {
-    auto bytes = size_t{0};
-    CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes));
-    cl_context result;
-    CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr));
-    return Context(result);
-  }
-  Device GetDevice() const {
-    auto bytes = size_t{0};
-    CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes));
-    cl_device_id result;
-    CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr));
-    return Device(result);
-  }
-
-  // Accessor to the private data-member
-  const cl_command_queue& operator()() const { return *queue_; }
- private:
-  std::shared_ptr<cl_command_queue> queue_;
-};
-
-// =================================================================================================
-
-// C++11 version of host memory
-template <typename T>
-class BufferHost {
- public:
-
-  // Regular constructor with memory management
-  explicit BufferHost(const Context &, const size_t size):
-      buffer_(new std::vector<T>(size)) {
-  }
-
-  // Retrieves the actual allocated size in bytes
-  size_t GetSize() const {
-    return buffer_->size()*sizeof(T);
-  }
-
-  // Compatibility with std::vector
-  size_t size() const { return buffer_->size(); }
-  T* begin() { return &(*buffer_)[0]; }
-  T* end() { return &(*buffer_)[buffer_->size()-1]; }
-  T& operator[](const size_t i) { return (*buffer_)[i]; }
-  T* data() { return buffer_->data(); }
-  const T* data() const { return buffer_->data(); }
-
- private:
-  std::shared_ptr<std::vector<T>> buffer_;
-};
-
-// =================================================================================================
-
-// Enumeration of buffer access types
-enum class BufferAccess { kReadOnly, kWriteOnly, kReadWrite, kNotOwned };
-
-// C++11 version of 'cl_mem'
-template <typename T>
-class Buffer {
- public:
-
-  // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
-  explicit Buffer(const cl_mem buffer):
-      buffer_(new cl_mem),
-      access_(BufferAccess::kNotOwned) {
-    *buffer_ = buffer;
-  }
-
-  // Regular constructor with memory management. If this class does not own the buffer object, then
-  // the memory will not be freed automatically afterwards.
-  explicit Buffer(const Context &context, const BufferAccess access, const size_t size):
-      buffer_(new cl_mem, [access](cl_mem* m) {
-        if (access != BufferAccess::kNotOwned) { CheckError(clReleaseMemObject(*m)); }
-        delete m;
-      }),
-      access_(access) {
-    auto flags = cl_mem_flags{CL_MEM_READ_WRITE};
-    if (access_ == BufferAccess::kReadOnly) { flags = CL_MEM_READ_ONLY; }
-    if (access_ == BufferAccess::kWriteOnly) { flags = CL_MEM_WRITE_ONLY; }
-    auto status = CL_SUCCESS;
-    *buffer_ = clCreateBuffer(context(), flags, size*sizeof(T), nullptr, &status);
-    CheckError(status);
-  }
-
-  // As above, but now with read/write access as a default
-  explicit Buffer(const Context &context, const size_t size):
-    Buffer<T>(context, BufferAccess::kReadWrite, size) {
-  }
-
-  // Constructs a new buffer based on an existing host-container
-  template <typename Iterator>
-  explicit Buffer(const Context &context, const Queue &queue, Iterator start, Iterator end):
-    Buffer(context, BufferAccess::kReadWrite, static_cast<size_t>(end - start)) {
-    auto size = static_cast<size_t>(end - start);
-    auto pointer = &*start;
-    CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), pointer, 0,
-                                    nullptr, nullptr));
-    queue.Finish();
-  }
-
-  // Copies from device to host: reading the device buffer a-synchronously
-  void ReadAsync(const Queue &queue, const size_t size, T* host, const size_t offset = 0) const {
-    if (access_ == BufferAccess::kWriteOnly) { Error("reading from a write-only buffer"); }
-    CheckError(clEnqueueReadBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T),
-                                   host, 0, nullptr, nullptr));
-  }
-  void ReadAsync(const Queue &queue, const size_t size, std::vector<T> &host,
-                 const size_t offset = 0) const {
-    if (host.size() < size) { Error("target host buffer is too small"); }
-    ReadAsync(queue, size, host.data(), offset);
-  }
-  void ReadAsync(const Queue &queue, const size_t size, BufferHost<T> &host,
-                 const size_t offset = 0) const {
-    if (host.size() < size) { Error("target host buffer is too small"); }
-    ReadAsync(queue, size, host.data(), offset);
-  }
-
-  // Copies from device to host: reading the device buffer
-  void Read(const Queue &queue, const size_t size, T* host, const size_t offset = 0) const {
-    ReadAsync(queue, size, host, offset);
-    queue.Finish();
-  }
-  void Read(const Queue &queue, const size_t size, std::vector<T> &host,
-            const size_t offset = 0) const {
-    Read(queue, size, host.data(), offset);
-  }
-  void Read(const Queue &queue, const size_t size, BufferHost<T> &host,
-            const size_t offset = 0) const {
-    Read(queue, size, host.data(), offset);
-  }
-
-  // Copies from host to device: writing the device buffer a-synchronously
-  void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
-    if (access_ == BufferAccess::kReadOnly) { Error("writing to a read-only buffer"); }
-    if (GetSize() < (offset+size)*sizeof(T)) { Error("target device buffer is too small"); }
-    CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T),
-                                    host, 0, nullptr, nullptr));
-  }
-  void WriteAsync(const Queue &queue, const size_t size, const std::vector<T> &host,
-                  const size_t offset = 0) {
-    WriteAsync(queue, size, host.data(), offset);
-  }
-  void WriteAsync(const Queue &queue, const size_t size, const BufferHost<T> &host,
-                  const size_t offset = 0) {
-    WriteAsync(queue, size, host.data(), offset);
-  }
-
-  // Copies from host to device: writing the device buffer
-  void Write(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
-    WriteAsync(queue, size, host, offset);
-    queue.Finish();
-  }
-  void Write(const Queue &queue, const size_t size, const std::vector<T> &host,
-             const size_t offset = 0) {
-    Write(queue, size, host.data(), offset);
-  }
-  void Write(const Queue &queue, const size_t size, const BufferHost<T> &host,
-             const size_t offset = 0) {
-    Write(queue, size, host.data(), offset);
-  }
-
-  // Copies the contents of this buffer into another device buffer
-  void CopyToAsync(const Queue &queue, const size_t size, const Buffer<T> &destination) const {
-    CheckError(clEnqueueCopyBuffer(queue(), *buffer_, destination(), 0, 0, size*sizeof(T), 0,
-                                   nullptr, nullptr));
-  }
-  void CopyTo(const Queue &queue, const size_t size, const Buffer<T> &destination) const {
-    CopyToAsync(queue, size, destination);
-    queue.Finish();
-  }
-
-  // Retrieves the actual allocated size in bytes
-  size_t GetSize() const {
-    auto bytes = size_t{0};
-    CheckError(clGetMemObjectInfo(*buffer_, CL_MEM_SIZE, 0, nullptr, &bytes));
-    auto result = size_t{0};
-    CheckError(clGetMemObjectInfo(*buffer_, CL_MEM_SIZE, bytes, &result, nullptr));
-    return result;
-  }
-
-  // Accessor to the private data-member
-  const cl_mem& operator()() const { return *buffer_; }
- private:
-  std::shared_ptr<cl_mem> buffer_;
-  const BufferAccess access_;
-};
-
-// =================================================================================================
-
-// C++11 version of 'cl_kernel'
-class Kernel {
- public:
-
-  // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
-  explicit Kernel(const cl_kernel kernel):
-      kernel_(new cl_kernel) {
-    *kernel_ = kernel;
-  }
-
-  // Regular constructor with memory management
-  explicit Kernel(const Program &program, const std::string &name):
-      kernel_(new cl_kernel, [](cl_kernel* k) { CheckError(clReleaseKernel(*k)); delete k; }) {
-    auto status = CL_SUCCESS;
-    *kernel_ = clCreateKernel(program(), name.c_str(), &status);
-    CheckError(status);
-  }
-
-  // Sets a kernel argument at the indicated position
-  template <typename T>
-  void SetArgument(const size_t index, const T &value) {
-    CheckError(clSetKernelArg(*kernel_, static_cast<cl_uint>(index), sizeof(T), &value));
-  }
-  template <typename T>
-  void SetArgument(const size_t index, Buffer<T> &value) {
-    SetArgument(index, value());
-  }
-
-  // Sets all arguments in one go using parameter packs. Note that this overwrites previously set
-  // arguments using 'SetArgument' or 'SetArguments'.
-  template <typename... Args>
-  void SetArguments(Args&... args) {
-    SetArgumentsRecursive(0, args...);
-  }
-
-  // Retrieves the amount of local memory used per work-group for this kernel
-  size_t LocalMemUsage(const Device &device) const {
-    auto bytes = size_t{0};
-    auto query = cl_kernel_work_group_info{CL_KERNEL_LOCAL_MEM_SIZE};
-    CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, 0, nullptr, &bytes));
-    auto result = size_t{0};
-    CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, bytes, &result, nullptr));
-    return result;
-  }
-
-  // Launches a kernel onto the specified queue
-  void Launch(const Queue &queue, const std::vector<size_t> &global,
-              const std::vector<size_t> &local, EventPointer event) {
-    CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
-                                      nullptr, global.data(), local.data(),
-                                      0, nullptr, event));
-  }
-
-  // As above, but with an event waiting list
-  void Launch(const Queue &queue, const std::vector<size_t> &global,
-              const std::vector<size_t> &local, EventPointer event,
-              std::vector<Event>& waitForEvents) {
-    if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
-
-    // Builds a plain version of the events waiting list
-    auto waitForEventsPlain = std::vector<cl_event>();
-    for (auto &waitEvent : waitForEvents) {
-      waitForEventsPlain.push_back(waitEvent());
-    }
-
-    // Launches the kernel while waiting for other events
-    CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
-                                      nullptr, global.data(), local.data(),
-                                      static_cast<cl_uint>(waitForEventsPlain.size()),
-                                      waitForEventsPlain.data(),
-                                      event));
-  }
-
-  // As above, but with the default local workgroup size
-  void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
-    CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
-                                      nullptr, global.data(), nullptr,
-                                      0, nullptr, event));
-  }
-
-  // Accessor to the private data-member
-  const cl_kernel& operator()() const { return *kernel_; }
- private:
-  std::shared_ptr<cl_kernel> kernel_;
-
-  // Internal implementation for the recursive SetArguments function.
-  template <typename T>
-  void SetArgumentsRecursive(const size_t index, T &first) {
-    SetArgument(index, first);
-  }
-  template <typename T, typename... Args>
-  void SetArgumentsRecursive(const size_t index, T &first, Args&... args) {
-    SetArgument(index, first);
-    SetArgumentsRecursive(index+1, args...);
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_CLPP11_H_
-#endif
diff --git a/include/internal/database.h b/include/internal/database.h
deleted file mode 100644
index f93eaa22..00000000
--- a/include/internal/database.h
+++ /dev/null
@@ -1,104 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Database class, providing a static variable holding the actual database
-// information. The class also provides utility functions to search the database and to access a
-// found entry by parameter-key. The database itself is filled in the corresponding source-file and
-// partially also by the database/xxxxx.h files, in which kernel-specific parameters are found.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_DATABASE_H_
-#define CLBLAST_DATABASE_H_
-
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-#include "internal/utilities.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-class Database {
- public:
-
-  // Type alias for the database parameters
-  using Parameters = std::unordered_map<std::string,size_t>;
-
-  // Structures for content inside the database
-  struct DatabaseDevice {
-    const std::string name;
-    const Parameters parameters;
-  };
-  struct DatabaseVendor {
-    const std::string type;
-    const std::string name;
-    const std::vector<DatabaseDevice> devices;
-  };
-  struct DatabaseEntry {
-    const std::string kernel;
-    const Precision precision;
-    const std::vector<DatabaseVendor> vendors;
-  };
-
-  // The OpenCL device types
-  static constexpr auto kDeviceTypeCPU = "CPU";
-  static constexpr auto kDeviceTypeGPU = "GPU";
-  static constexpr auto kDeviceTypeAccelerator = "accelerator";
-  static constexpr auto kDeviceTypeAll = "default";
-
-  // The OpenCL device vendors
-  static constexpr auto kDeviceVendorAll = "default";
-
-  // Alternative names for some OpenCL vendors
-  const std::unordered_map<std::string,std::string> kVendorNames {
-    {"Intel(R) Corporation", "Intel"},
-    {"GenuineIntel", "Intel"},
-    {"Advanced Micro Devices, Inc.", "AMD"},
-    {"NVIDIA Corporation", "NVIDIA"},
-  };
-
-  // The database consists of separate database entries, stored together in a vector
-  static const DatabaseEntry XaxpyHalf, XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble;
-  static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
-  static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
-  static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
-  static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
-  static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
-  static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
-  static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
-  static const DatabaseEntry PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble;
-  static const std::vector<DatabaseEntry> database;
-
-  // The constructor
-  explicit Database(const Queue &queue, const std::vector<std::string> &routines,
-                    const Precision precision);
-
-  // Accessor of values by key
-  size_t operator[](const std::string key) const { return parameters_.find(key)->second; }
-
-  // Obtain a list of OpenCL pre-processor defines based on the parameters
-  std::string GetDefines() const;
-
- private:
-  Parameters Search(const std::string &this_kernel, const std::string &this_type,
-                    const std::string &this_vendor, const std::string &this_device,
-                    const Precision this_precision) const;
-
-  // Found parameters suitable for this device/kernel
-  Parameters parameters_;
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_DATABASE_H_
-#endif
diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h
deleted file mode 100644
index 201e8b8a..00000000
--- a/include/internal/database/copy.h
+++ /dev/null
@@ -1,262 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Copy' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::CopyHalf = {
-  "Copy", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::CopySingle = {
-  "Copy", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "Hawaii",                                          { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "Pitcairn",                                        { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "Tahiti",                                          { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "Iris",                                            { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Iris Pro",                                        { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
-        { "default",                                         { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } },
-        { "GeForce GTX TITAN X",                             { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "Tesla K20m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
-        { "Tesla K40m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::CopyComplexSingle = {
-  "Copy", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Hawaii",                                          { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Pitcairn",                                        { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Tahiti",                                          { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
-        { "Iris",                                            { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Iris Pro",                                        { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Tesla K20m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
-        { "Tesla K40m",                                      { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::CopyDouble = {
-  "Copy", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Hawaii",                                          { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Pitcairn",                                        { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Tahiti",                                          { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "GeForce GTX TITAN X",                             { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Tesla K20m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
-        { "Tesla K40m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::CopyComplexDouble = {
-  "Copy", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Hawaii",                                          { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
-        { "Pitcairn",                                        { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Tahiti",                                          { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "Tesla K20m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
-        { "Tesla K40m",                                      { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h
deleted file mode 100644
index cc703dd6..00000000
--- a/include/internal/database/pad.h
+++ /dev/null
@@ -1,270 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Pad' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadHalf = {
-  "Pad", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadSingle = {
-  "Pad", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Hawaii",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-        { "Pitcairn",                                        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Tahiti",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "Iris",                                            { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "Iris Pro",                                        { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 480",                                 { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-        { "GeForce GTX 680",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 750 Ti",                              { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 980",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN",                               { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN X",                             { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tesla K20m",                                      { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "Tesla K40m",                                      { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadComplexSingle = {
-  "Pad", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Hawaii",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Pitcairn",                                        { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Tahiti",                                          { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
-        { "Iris",                                            { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
-        { "Iris Pro",                                        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 480",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 680",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "GeForce GTX 750 Ti",                              { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 980",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN",                               { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN X",                             { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tesla K20m",                                      { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Tesla K40m",                                      { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadDouble = {
-  "Pad", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Hawaii",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Pitcairn",                                        { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Tahiti",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 480",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 680",                                 { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "GeForce GTX 750 Ti",                              { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 980",                                 { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN",                               { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN X",                             { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tesla K20m",                                      { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tesla K40m",                                      { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadComplexDouble = {
-  "Pad", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Hawaii",                                          { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Pitcairn",                                        { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tahiti",                                          { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 480",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 680",                                 { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 750 Ti",                              { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX 980",                                 { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "GeForce GTX TITAN",                               { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "GeForce GTX TITAN X",                             { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "Tesla K20m",                                      { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
-        { "Tesla K40m",                                      { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h
deleted file mode 100644
index f3b1f262..00000000
--- a/include/internal/database/padtranspose.h
+++ /dev/null
@@ -1,270 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadtransposeHalf = {
-  "Padtranspose", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadtransposeSingle = {
-  "Padtranspose", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Hawaii",                                          { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Pitcairn",                                        { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Tahiti",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Iris",                                            { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Iris Pro",                                        { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 480",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 680",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 750 Ti",                              { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 980",                                 { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX TITAN X",                             { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Tesla K40m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadtransposeComplexSingle = {
-  "Padtranspose", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Hawaii",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Pitcairn",                                        { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Tahiti",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Iris",                                            { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Iris Pro",                                        { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "Tesla K40m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadtransposeDouble = {
-  "Padtranspose", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
-        { "Hawaii",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "Pitcairn",                                        { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Tahiti",                                          { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 980",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "Tesla K40m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::PadtransposeComplexDouble = {
-  "Padtranspose", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Hawaii",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Pitcairn",                                        { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Tahiti",                                          { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
-        { "default",                                         { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
-        { "GeForce GTX 980",                                 { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "Tesla K40m",                                      { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h
deleted file mode 100644
index 0c893dae..00000000
--- a/include/internal/database/transpose.h
+++ /dev/null
@@ -1,258 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Transpose' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::TransposeHalf = {
-  "Transpose", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::TransposeSingle = {
-  "Transpose", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
-        { "Hawaii",                                          { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
-        { "Pitcairn",                                        { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "Tahiti",                                          { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "Iris",                                            { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Iris Pro",                                        { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "GeForce GTX 480",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "GeForce GTX 680",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "GeForce GTX 750 Ti",                              { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "GeForce GTX 980",                                 { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "GeForce GTX TITAN X",                             { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Tesla K20m",                                      { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Tesla K40m",                                      { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::TransposeComplexSingle = {
-  "Transpose", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "Hawaii",                                          { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "Pitcairn",                                        { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "Tahiti",                                          { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "default",                                         { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "Iris",                                            { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "Iris Pro",                                        { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "Tesla K40m",                                      { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::TransposeDouble = {
-  "Transpose", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "Hawaii",                                          { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "Pitcairn",                                        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "Tahiti",                                          { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "GeForce GTX 480",                                 { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "GeForce GTX 680",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "GeForce GTX 750 Ti",                              { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "GeForce GTX TITAN",                               { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "GeForce GTX TITAN X",                             { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "Tesla K40m",                                      { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::TransposeComplexDouble = {
-  "Transpose", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "Hawaii",                                          { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
-        { "Pitcairn",                                        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "Tahiti",                                          { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "GeForce GTX 480",                                 { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX 680",                                 { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX 980",                                 { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "Tesla K20m",                                      { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "Tesla K40m",                                      { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-        { "default",                                         { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h
deleted file mode 100644
index 6e6719e8..00000000
--- a/include/internal/database/xaxpy.h
+++ /dev/null
@@ -1,270 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XaxpyHalf = {
-  "Xaxpy", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"VW",4}, {"WGS",512}, {"WPT",8} } },
-        { "default",                                         { {"VW",4}, {"WGS",512}, {"WPT",8} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"VW",4}, {"WGS",512}, {"WPT",8} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XaxpySingle = {
-  "Xaxpy", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "Hawaii",                                          { {"VW",2}, {"WGS",64}, {"WPT",2} } },
-        { "Pitcairn",                                        { {"VW",2}, {"WGS",128}, {"WPT",1} } },
-        { "Tahiti",                                          { {"VW",2}, {"WGS",64}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"VW",4}, {"WGS",256}, {"WPT",1} } },
-        { "default",                                         { {"VW",4}, {"WGS",256}, {"WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"VW",1}, {"WGS",512}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"VW",4}, {"WGS",256}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"VW",1}, {"WGS",512}, {"WPT",2} } },
-        { "Iris",                                            { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "Iris Pro",                                        { {"VW",1}, {"WGS",128}, {"WPT",2} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",2}, {"WGS",1024}, {"WPT",2} } },
-        { "default",                                         { {"VW",2}, {"WGS",1024}, {"WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"VW",2}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 480",                                 { {"VW",4}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 680",                                 { {"VW",2}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "GeForce GTX 980",                                 { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"VW",4}, {"WGS",256}, {"WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "Tesla K20m",                                      { {"VW",4}, {"WGS",128}, {"WPT",1} } },
-        { "Tesla K40m",                                      { {"VW",4}, {"WGS",128}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XaxpyComplexSingle = {
-  "Xaxpy", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"VW",2}, {"WGS",64}, {"WPT",8} } },
-        { "Hawaii",                                          { {"VW",1}, {"WGS",128}, {"WPT",2} } },
-        { "Pitcairn",                                        { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "Tahiti",                                          { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"VW",4}, {"WGS",256}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"VW",1}, {"WGS",1024}, {"WPT",2} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"VW",2}, {"WGS",512}, {"WPT",1} } },
-        { "Iris",                                            { {"VW",2}, {"WGS",128}, {"WPT",1} } },
-        { "Iris Pro",                                        { {"VW",1}, {"WGS",256}, {"WPT",8} } },
-        { "default",                                         { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"VW",1}, {"WGS",512}, {"WPT",1} } },
-        { "GeForce GTX 480",                                 { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "GeForce GTX 680",                                 { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"VW",1}, {"WGS",512}, {"WPT",1} } },
-        { "GeForce GTX 980",                                 { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"VW",1}, {"WGS",512}, {"WPT",1} } },
-        { "Tesla K20m",                                      { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "Tesla K40m",                                      { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XaxpyDouble = {
-  "Xaxpy", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "Hawaii",                                          { {"VW",1}, {"WGS",64}, {"WPT",2} } },
-        { "Pitcairn",                                        { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "Tahiti",                                          { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"VW",2}, {"WGS",128}, {"WPT",2} } },
-        { "default",                                         { {"VW",2}, {"WGS",128}, {"WPT",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"VW",8}, {"WGS",64}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"VW",8}, {"WGS",2048}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",2}, {"WGS",512}, {"WPT",1} } },
-        { "default",                                         { {"VW",2}, {"WGS",512}, {"WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 480",                                 { {"VW",2}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 680",                                 { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 980",                                 { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
-        { "GeForce GTX TITAN X",                             { {"VW",1}, {"WGS",512}, {"WPT",1} } },
-        { "Tesla K20m",                                      { {"VW",2}, {"WGS",128}, {"WPT",1} } },
-        { "Tesla K40m",                                      { {"VW",2}, {"WGS",128}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XaxpyComplexDouble = {
-  "Xaxpy", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "Hawaii",                                          { {"VW",2}, {"WGS",64}, {"WPT",1} } },
-        { "Pitcairn",                                        { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "Tahiti",                                          { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"VW",1}, {"WGS",64}, {"WPT",8} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",8} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"VW",8}, {"WGS",128}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"VW",8}, {"WGS",512}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"VW",1}, {"WGS",256}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 480",                                 { {"VW",1}, {"WGS",128}, {"WPT",1} } },
-        { "GeForce GTX 680",                                 { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "GeForce GTX 750 Ti",                              { {"VW",1}, {"WGS",256}, {"WPT",2} } },
-        { "GeForce GTX 980",                                 { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"VW",1}, {"WGS",64}, {"WPT",4} } },
-        { "GeForce GTX TITAN X",                             { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
-        { "Tesla K20m",                                      { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "Tesla K40m",                                      { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"VW",1}, {"WGS",64}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h
deleted file mode 100644
index d09d8c62..00000000
--- a/include/internal/database/xdot.h
+++ /dev/null
@@ -1,200 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xdot' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XdotHalf = {
-  "Xdot", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",32}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",32}, {"WGS2",32} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",32}, {"WGS2",32} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XdotSingle = {
-  "Xdot", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",128}, {"WGS2",32} } },
-        { "Hawaii",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "Pitcairn",                                        { {"WGS1",128}, {"WGS2",32} } },
-        { "Tahiti",                                          { {"WGS1",128}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",128}, {"WGS2",32} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",1024}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",1024}, {"WGS2",32} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",64}, {"WGS2",32} } },
-        { "Iris Pro",                                        { {"WGS1",512}, {"WGS2",64} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WGS2",32} } },
-        { "GeForce GTX 480",                                 { {"WGS1",512}, {"WGS2",32} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WGS2",128} } },
-        { "GeForce GTX 980",                                 { {"WGS1",256}, {"WGS2",32} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",256}, {"WGS2",32} } },
-        { "Tesla K20m",                                      { {"WGS1",1024}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",128}, {"WGS2",32} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XdotComplexSingle = {
-  "Xdot", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WGS2",32} } },
-        { "Hawaii",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "Pitcairn",                                        { {"WGS1",256}, {"WGS2",32} } },
-        { "Tahiti",                                          { {"WGS1",64}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",1024}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",1024}, {"WGS2",32} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",32}, {"WGS2",32} } },
-        { "Iris Pro",                                        { {"WGS1",32}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",32}, {"WGS2",32} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",64}, {"WGS2",32} } },
-        { "GeForce GTX 480",                                 { {"WGS1",512}, {"WGS2",32} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WGS2",64} } },
-        { "GeForce GTX 980",                                 { {"WGS1",256}, {"WGS2",64} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",256}, {"WGS2",32} } },
-        { "Tesla K20m",                                      { {"WGS1",512}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",32}, {"WGS2",32} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XdotDouble = {
-  "Xdot", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WGS2",128} } },
-        { "Hawaii",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "Pitcairn",                                        { {"WGS1",128}, {"WGS2",32} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",512}, {"WGS2",64} } },
-        { "default",                                         { {"WGS1",512}, {"WGS2",64} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WGS2",32} } },
-        { "GeForce GTX 480",                                 { {"WGS1",512}, {"WGS2",32} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WGS2",64} } },
-        { "GeForce GTX 980",                                 { {"WGS1",128}, {"WGS2",32} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",256}, {"WGS2",32} } },
-        { "Tesla K20m",                                      { {"WGS1",512}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",128}, {"WGS2",32} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XdotComplexDouble = {
-  "Xdot", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WGS2",32} } },
-        { "Hawaii",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "Pitcairn",                                        { {"WGS1",256}, {"WGS2",32} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",1024}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",1024}, {"WGS2",32} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",64}, {"WGS2",32} } },
-        { "GeForce GTX 480",                                 { {"WGS1",512}, {"WGS2",32} } },
-        { "GeForce GTX 680",                                 { {"WGS1",256}, {"WGS2",64} } },
-        { "GeForce GTX 980",                                 { {"WGS1",64}, {"WGS2",32} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",128}, {"WGS2",32} } },
-        { "Tesla K20m",                                      { {"WGS1",128}, {"WGS2",32} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WGS2",32} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h
deleted file mode 100644
index f35d2c88..00000000
--- a/include/internal/database/xgemm.h
+++ /dev/null
@@ -1,263 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemmHalf = {
-  "Xgemm", Precision::kHalf, {
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemmSingle = {
-  "Xgemm", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",8} } },
-        { "Hawaii",                                          { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
-        { "Pitcairn",                                        { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tahiti",                                          { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } },
-        { "Iris",                                            { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } },
-        { "Iris Pro",                                        { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
-        { "default",                                         { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
-        { "GeForce GTX 480",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX 680",                                 { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",2} } },
-        { "GeForce GTX 750 Ti",                              { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",4} } },
-        { "GeForce GTX 980",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",8} } },
-        { "GeForce GTX TITAN",                               { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX TITAN X",                             { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",8} } },
-        { "Tesla K20m",                                      { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
-        { "Tesla K40m",                                      { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemmComplexSingle = {
-  "Xgemm", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
-        { "Hawaii",                                          { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Pitcairn",                                        { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
-        { "Tahiti",                                          { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",4}, {"VWN",1} } },
-        { "Iris",                                            { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Iris Pro",                                        { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "GeForce GTX 480",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX 680",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX 750 Ti",                              { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "GeForce GTX 980",                                 { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } },
-        { "GeForce GTX TITAN",                               { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "GeForce GTX TITAN X",                             { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } },
-        { "Tesla K20m",                                      { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "Tesla K40m",                                      { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemmDouble = {
-  "Xgemm", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
-        { "Hawaii",                                          { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "Pitcairn",                                        { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "Tahiti",                                          { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",8} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "default",                                         { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX 480",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "GeForce GTX 680",                                 { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
-        { "GeForce GTX 750 Ti",                              { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } },
-        { "GeForce GTX 980",                                 { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
-        { "GeForce GTX TITAN",                               { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX TITAN X",                             { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tesla K20m",                                      { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tesla K40m",                                      { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemmComplexDouble = {
-  "Xgemm", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "Hawaii",                                          { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "Pitcairn",                                        { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tahiti",                                          { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "default",                                         { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
-        { "GeForce GTX 480",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "GeForce GTX 680",                                 { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "GeForce GTX 750 Ti",                              { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
-        { "GeForce GTX 980",                                 { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
-        { "GeForce GTX TITAN X",                             { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tesla K20m",                                      { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "Tesla K40m",                                      { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h
deleted file mode 100644
index 6b76c8ac..00000000
--- a/include/internal/database/xgemv.h
+++ /dev/null
@@ -1,231 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemvHalf = {
-  "Xgemv", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",128}, {"WPT1",1}, {"VW2",2}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemvSingle = {
-  "Xgemv", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Hawaii",                                          { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Pitcairn",                                        { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",64}, {"WPT1",1}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"WGS1",64}, {"WPT1",4}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",4} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
-        { "Iris",                                            { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } },
-        { "Iris Pro",                                        { {"WGS1",256}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",256}, {"WPT1",1}, {"VW2",2}, {"WGS2",256}, {"WPT2",2}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "GeForce GTX 480",                                 { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "GeForce GTX 680",                                 { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",128}, {"WPT3",2} } },
-        { "GeForce GTX 750 Ti",                              { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",4}, {"WGS3",128}, {"WPT3",4} } },
-        { "GeForce GTX 980",                                 { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "Tesla K20m",                                      { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "Tesla K40m",                                      { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemvComplexSingle = {
-  "Xgemv", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",256}, {"WPT2",2}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "Hawaii",                                          { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Pitcairn",                                        { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "Tahiti",                                          { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"WGS1",64}, {"WPT1",4}, {"VW2",4}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } },
-        { "Iris",                                            { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Iris Pro",                                        { {"WGS1",64}, {"WPT1",1}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX 480",                                 { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX 680",                                 { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX 750 Ti",                              { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",256}, {"WPT1",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemvDouble = {
-  "Xgemv", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "Hawaii",                                          { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Pitcairn",                                        { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",64}, {"WPT1",2}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"WGS1",64}, {"WPT1",4}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",2} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX 480",                                 { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",128}, {"WPT3",2} } },
-        { "GeForce GTX 750 Ti",                              { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",2}, {"WGS3",256}, {"WPT3",2} } },
-        { "GeForce GTX 980",                                 { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "GeForce GTX TITAN X",                             { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "Tesla K20m",                                      { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Tesla K40m",                                      { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgemvComplexDouble = {
-  "Xgemv", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } },
-        { "Hawaii",                                          { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Pitcairn",                                        { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz",         { {"WGS1",64}, {"WPT1",4}, {"VW2",4}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",256}, {"WPT3",2} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-      }
-    },
-    { // Intel accelerators
-      kDeviceTypeAccelerator, "Intel", {
-        { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } },
-        { "GeForce GTX 480",                                 { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h
deleted file mode 100644
index f2e0a36f..00000000
--- a/include/internal/database/xger.h
+++ /dev/null
@@ -1,220 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Database generator <database.py>
-//
-// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
-//
-// =================================================================================================
-
-namespace clblast {
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgerHalf = {
-  "Xger", Precision::kHalf, {
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgerSingle = {
-  "Xger", Precision::kSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
-        { "Hawaii",                                          { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
-        { "Pitcairn",                                        { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-        { "Tahiti",                                          { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
-        { "default",                                         { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",8}, {"WGS2",8}, {"WPT",4} } },
-        { "Iris Pro",                                        { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } },
-        { "default",                                         { {"WGS1",8}, {"WGS2",1}, {"WPT",4} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
-        { "GeForce GTX 480",                                 { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgerComplexSingle = {
-  "Xger", Precision::kComplexSingle, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
-        { "Hawaii",                                          { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
-        { "Pitcairn",                                        { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
-        { "Tahiti",                                          { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
-      }
-    },
-    { // Intel GPUs
-      kDeviceTypeGPU, "Intel", {
-        { "Intel(R) HD Graphics Skylake ULT GT2",            { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
-        { "Iris Pro",                                        { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } },
-        { "default",                                         { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",64}, {"WGS2",4}, {"WPT",2} } },
-        { "GeForce GTX 480",                                 { {"WGS1",128}, {"WGS2",2}, {"WPT",2} } },
-        { "GeForce GTX 680",                                 { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgerDouble = {
-  "Xger", Precision::kDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
-        { "Hawaii",                                          { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
-        { "Pitcairn",                                        { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-        { "Tahiti",                                          { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",128}, {"WGS2",8}, {"WPT",2} } },
-        { "GeForce GTX 480",                                 { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
-        { "GeForce GTX 680",                                 { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",16}, {"WGS2",4}, {"WPT",2} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-
-const Database::DatabaseEntry Database::XgerComplexDouble = {
-  "Xger", Precision::kComplexDouble, {
-    { // AMD GPUs
-      kDeviceTypeGPU, "AMD", {
-        { "AMD Radeon R9 M370X Compute Engine",              { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
-        { "Hawaii",                                          { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } },
-        { "Pitcairn",                                        { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
-        { "Tahiti",                                          { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
-        { "default",                                         { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-    { // ARM GPUs
-      kDeviceTypeGPU, "ARM", {
-        { "Mali-T628",                                       { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } },
-        { "default",                                         { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } },
-      }
-    },
-    { // Intel CPUs
-      kDeviceTypeCPU, "Intel", {
-        { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz",        { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } },
-        { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz",        { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
-      }
-    },
-    { // NVIDIA GPUs
-      kDeviceTypeGPU, "NVIDIA", {
-        { "GRID K520",                                       { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
-        { "GeForce GTX 480",                                 { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } },
-        { "GeForce GTX 680",                                 { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } },
-        { "GeForce GTX TITAN",                               { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
-        { "default",                                         { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } },
-      }
-    },
-    { // Default
-      kDeviceTypeAll, "default", {
-        { "default",                                         { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } },
-      }
-    },
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
diff --git a/include/internal/public_api.h b/include/internal/public_api.h
deleted file mode 100644
index d0732297..00000000
--- a/include/internal/public_api.h
+++ /dev/null
@@ -1,34 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file provides macro's to define the public API. This is needed when building a Windows DLL.
-// Note: this is only used for the C++ interface, the C interface has its own definition included in
-// the header file itself.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_PUBLIC_API_H_
-#define CLBLAST_PUBLIC_API_H_
-
-namespace clblast {
-// =================================================================================================
-
-// Exports library functions under Windows when building a DLL. See also:
-// https://msdn.microsoft.com/en-us/library/a90k134d.aspx
-#ifdef _WIN32
-  #define PUBLIC_API __declspec(dllexport)
-#else
-  #define PUBLIC_API
-#endif
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_PUBLIC_API_H_
-#endif
diff --git a/include/internal/routine.h b/include/internal/routine.h
deleted file mode 100644
index a6a59d77..00000000
--- a/include/internal/routine.h
+++ /dev/null
@@ -1,68 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements all the basic functionality for the BLAS routines. This class serves as a
-// base class for the actual routines (e.g. Xaxpy, Xgemm). It contains common functionality such as
-// compiling the OpenCL kernel, connecting to the database, etc.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINE_H_
-#define CLBLAST_ROUTINE_H_
-
-#include <string>
-#include <vector>
-
-#include "internal/cache.h"
-#include "internal/utilities.h"
-#include "internal/database.h"
-#include "internal/buffer_test.h"
-#include "internal/routines/common.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-class Routine {
- public:
-
-  // Base class constructor
-  explicit Routine(Queue &queue, EventPointer event, const std::string &name,
-                   const std::vector<std::string> &routines, const Precision precision);
-
-  // Set-up phase of the kernel
-  StatusCode SetUp();
-
- protected:
-
-  // Non-static variable for the precision
-  const Precision precision_;
-
-  // The routine's name and its kernel-source in string form
-  const std::string routine_name_;
-  std::string source_string_;
-
-  // The OpenCL objects, accessible only from derived classes
-  Queue queue_;
-  EventPointer event_;
-  const Context context_;
-  const Device device_;
-
-  // OpenCL device properties
-  const std::string device_name_;
-
-  // Connection to the database for all the device-specific parameters
-  const Database db_;
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINE_H_
-#endif
diff --git a/include/internal/routines/common.h b/include/internal/routines/common.h
deleted file mode 100644
index 308785bd..00000000
--- a/include/internal/routines/common.h
+++ /dev/null
@@ -1,173 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file contains all the interfaces to common kernels, such as copying, padding, and
-// transposing a matrix. These functions are templated and thus header-only. This file also contains
-// other common functions to routines, such as a function to launch a kernel.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_COMMON_H_
-#define CLBLAST_ROUTINES_COMMON_H_
-
-#include <string>
-#include <vector>
-
-#include "clblast.h"
-#include "internal/clpp11.h"
-#include "internal/database.h"
-
-namespace clblast {
-// =================================================================================================
-
-// Enqueues a kernel, waits for completion, and checks for errors
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
-                     std::vector<size_t> global, const std::vector<size_t> &local,
-                     EventPointer event, std::vector<Event>& waitForEvents);
-
-// As above, but without an event waiting list
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
-                     std::vector<size_t> global, const std::vector<size_t> &local,
-                     EventPointer event);
-
-// =================================================================================================
-
-// Copies or transposes a matrix and optionally pads/unpads it with zeros. This method is also able
-// to write to symmetric and triangular matrices through optional arguments.
-template <typename T>
-StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device, const Context &context,
-                                  const Database &db,
-                                  EventPointer event, std::vector<Event>& waitForEvents,
-                                  const size_t src_one, const size_t src_two,
-                                  const size_t src_ld, const size_t src_offset,
-                                  const Buffer<T> &src,
-                                  const size_t dest_one, const size_t dest_two,
-                                  const size_t dest_ld, const size_t dest_offset,
-                                  const Buffer<T> &dest,
-                                  const T alpha,
-                                  const Program &program, const bool do_pad,
-                                  const bool do_transpose, const bool do_conjugate,
-                                  const bool upper = false, const bool lower = false,
-                                  const bool diagonal_imag_zero = false) {
-
-  // Determines whether or not the fast-version could potentially be used
-  auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) &&
-                         (src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld) &&
-                         (upper == false) && (lower == false) && (diagonal_imag_zero == false);
-
-  // Determines the right kernel
-  auto kernel_name = std::string{};
-  if (do_transpose) {
-    if (use_fast_kernel &&
-        IsMultiple(src_ld, db["TRA_WPT"]) &&
-        IsMultiple(src_one, db["TRA_WPT"]*db["TRA_WPT"]) &&
-        IsMultiple(src_two, db["TRA_WPT"]*db["TRA_WPT"])) {
-      kernel_name = "TransposeMatrixFast";
-    }
-    else {
-      use_fast_kernel = false;
-      kernel_name = (do_pad) ? "TransposePadMatrix" : "TransposeMatrix";
-    }
-  }
-  else {
-    if (use_fast_kernel &&
-        IsMultiple(src_ld, db["COPY_VW"]) &&
-        IsMultiple(src_one, db["COPY_VW"]*db["COPY_DIMX"]) &&
-        IsMultiple(src_two, db["COPY_WPT"]*db["COPY_DIMY"])) {
-      kernel_name = "CopyMatrixFast";
-    }
-    else {
-      use_fast_kernel = false;
-      kernel_name = (do_pad) ? "CopyPadMatrix" : "CopyMatrix";
-    }
-  }
-
-  // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
-  auto alpha_buffer = Buffer<T>(context, 1);
-  alpha_buffer.Write(queue, 1, &alpha);
-
-  // Retrieves the kernel from the compiled binary
-  try {
-    auto kernel = Kernel(program, kernel_name);
-
-    // Sets the kernel arguments
-    if (use_fast_kernel) {
-      kernel.SetArgument(0, static_cast<int>(src_ld));
-      kernel.SetArgument(1, src());
-      kernel.SetArgument(2, dest());
-      kernel.SetArgument(3, alpha_buffer());
-    }
-    else {
-      kernel.SetArgument(0, static_cast<int>(src_one));
-      kernel.SetArgument(1, static_cast<int>(src_two));
-      kernel.SetArgument(2, static_cast<int>(src_ld));
-      kernel.SetArgument(3, static_cast<int>(src_offset));
-      kernel.SetArgument(4, src());
-      kernel.SetArgument(5, static_cast<int>(dest_one));
-      kernel.SetArgument(6, static_cast<int>(dest_two));
-      kernel.SetArgument(7, static_cast<int>(dest_ld));
-      kernel.SetArgument(8, static_cast<int>(dest_offset));
-      kernel.SetArgument(9, dest());
-      kernel.SetArgument(10, alpha_buffer());
-      if (do_pad) {
-        kernel.SetArgument(11, static_cast<int>(do_conjugate));
-      }
-      else {
-        kernel.SetArgument(11, static_cast<int>(upper));
-        kernel.SetArgument(12, static_cast<int>(lower));
-        kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero));
-      }
-    }
-
-    // Launches the kernel and returns the error code. Uses global and local thread sizes based on
-    // parameters in the database.
-    if (do_transpose) {
-      if (use_fast_kernel) {
-        const auto global = std::vector<size_t>{
-          dest_one / db["TRA_WPT"],
-          dest_two / db["TRA_WPT"]
-        };
-        const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]};
-        return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
-      }
-      else {
-        const auto global = std::vector<size_t>{
-          Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
-          Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"])
-        };
-        const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]};
-        return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
-      }
-    }
-    else {
-      if (use_fast_kernel) {
-        const auto global = std::vector<size_t>{
-          dest_one / db["COPY_VW"],
-          dest_two / db["COPY_WPT"]
-        };
-        const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]};
-        return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
-      }
-      else {
-        const auto global = std::vector<size_t>{
-          Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
-          Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"])
-        };
-        const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]};
-        return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
-      }
-    }
-  } catch (...) { return StatusCode::kInvalidKernel; }
-}
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_COMMON_H_
-#endif
diff --git a/include/internal/routines/level1/xamax.h b/include/internal/routines/level1/xamax.h
deleted file mode 100644
index 42f8f67c..00000000
--- a/include/internal/routines/level1/xamax.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xamax routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XAMAX_H_
-#define CLBLAST_ROUTINES_XAMAX_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xamax: public Routine {
- public:
-
-  // Constructor
-  Xamax(Queue &queue, EventPointer event, const std::string &name = "AMAX");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoAmax(const size_t n,
-                    const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XAMAX_H_
-#endif
diff --git a/include/internal/routines/level1/xasum.h b/include/internal/routines/level1/xasum.h
deleted file mode 100644
index 9d93a6f4..00000000
--- a/include/internal/routines/level1/xasum.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xasum routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XASUM_H_
-#define CLBLAST_ROUTINES_XASUM_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xasum: public Routine {
- public:
-
-  // Constructor
-  Xasum(Queue &queue, EventPointer event, const std::string &name = "ASUM");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoAsum(const size_t n,
-                    const Buffer<T> &asum_buffer, const size_t asum_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XASUM_H_
-#endif
diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h
deleted file mode 100644
index 4c8d2c1f..00000000
--- a/include/internal/routines/level1/xaxpy.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xaxpy routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XAXPY_H_
-#define CLBLAST_ROUTINES_XAXPY_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xaxpy: public Routine {
- public:
-
-  // Constructor
-  Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoAxpy(const size_t n, const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XAXPY_H_
-#endif
diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h
deleted file mode 100644
index c7d03dd0..00000000
--- a/include/internal/routines/level1/xcopy.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xcopy routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XCOPY_H_
-#define CLBLAST_ROUTINES_XCOPY_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xcopy: public Routine {
- public:
-
-  // Constructor
-  Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoCopy(const size_t n,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XCOPY_H_
-#endif
diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h
deleted file mode 100644
index e1968740..00000000
--- a/include/internal/routines/level1/xdot.h
+++ /dev/null
@@ -1,42 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xdot routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XDOT_H_
-#define CLBLAST_ROUTINES_XDOT_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xdot: public Routine {
- public:
-
-  // Constructor
-  Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoDot(const size_t n,
-                   const Buffer<T> &dot_buffer, const size_t dot_offset,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                   const bool do_conjugate = false);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XDOT_H_
-#endif
diff --git a/include/internal/routines/level1/xdotc.h b/include/internal/routines/level1/xdotc.h
deleted file mode 100644
index 0dc2cfe9..00000000
--- a/include/internal/routines/level1/xdotc.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xdotc routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XDOTC_H_
-#define CLBLAST_ROUTINES_XDOTC_H_
-
-#include "internal/routines/level1/xdot.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xdotc: public Xdot<T> {
- public:
-
-  // Uses the regular Xdot routine
-  using Xdot<T>::DoDot;
-
-  // Constructor
-  Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoDotc(const size_t n,
-                    const Buffer<T> &dot_buffer, const size_t dot_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XDOTC_H_
-#endif
diff --git a/include/internal/routines/level1/xdotu.h b/include/internal/routines/level1/xdotu.h
deleted file mode 100644
index 98988744..00000000
--- a/include/internal/routines/level1/xdotu.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xdotu routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XDOTU_H_
-#define CLBLAST_ROUTINES_XDOTU_H_
-
-#include "internal/routines/level1/xdot.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xdotu: public Xdot<T> {
- public:
-
-  // Uses the regular Xdot routine
-  using Xdot<T>::DoDot;
-
-  // Constructor
-  Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoDotu(const size_t n,
-                    const Buffer<T> &dot_buffer, const size_t dot_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XDOTU_H_
-#endif
diff --git a/include/internal/routines/level1/xmax.h b/include/internal/routines/level1/xmax.h
deleted file mode 100644
index a872cede..00000000
--- a/include/internal/routines/level1/xmax.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xmax routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XMAX_H_
-#define CLBLAST_ROUTINES_XMAX_H_
-
-#include "internal/routine.h"
-#include "internal/routines/level1/xamax.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xmax: public Xamax<T> {
- public:
-
-  // Members and methods from the base class
-  using Xamax<T>::DoAmax;
-
-  // Constructor
-  Xmax(Queue &queue, EventPointer event, const std::string &name = "MAX"):
-    Xamax<T>(queue, event, name) {
-  }
-
-  // Forwards to the regular absolute version. The implementation difference is realised in the
-  // kernel through a pre-processor macro based on the name of the routine.
-  StatusCode DoMax(const size_t n,
-                   const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
-    return DoAmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc);
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XMAX_H_
-#endif
diff --git a/include/internal/routines/level1/xmin.h b/include/internal/routines/level1/xmin.h
deleted file mode 100644
index 700c81cc..00000000
--- a/include/internal/routines/level1/xmin.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xmin routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XMIN_H_
-#define CLBLAST_ROUTINES_XMIN_H_
-
-#include "internal/routine.h"
-#include "internal/routines/level1/xamax.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xmin: public Xamax<T> {
- public:
-
-  // Members and methods from the base class
-  using Xamax<T>::DoAmax;
-
-  // Constructor
-  Xmin(Queue &queue, EventPointer event, const std::string &name = "MIN"):
-    Xamax<T>(queue, event, name) {
-  }
-
-  // Forwards to the regular max-absolute version. The implementation difference is realised in the
-  // kernel through a pre-processor macro based on the name of the routine.
-  StatusCode DoMin(const size_t n,
-                   const Buffer<unsigned int> &imin_buffer, const size_t imin_offset,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
-    return DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc);
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XMIN_H_
-#endif
diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h
deleted file mode 100644
index ca9268c0..00000000
--- a/include/internal/routines/level1/xnrm2.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xnrm2 routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XNRM2_H_
-#define CLBLAST_ROUTINES_XNRM2_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xnrm2: public Routine {
- public:
-
-  // Constructor
-  Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoNrm2(const size_t n,
-                    const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XNRM2_H_
-#endif
diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h
deleted file mode 100644
index b9430f3b..00000000
--- a/include/internal/routines/level1/xscal.h
+++ /dev/null
@@ -1,39 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xscal routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSCAL_H_
-#define CLBLAST_ROUTINES_XSCAL_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xscal: public Routine {
- public:
-
-  // Constructor
-  Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoScal(const size_t n, const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSCAL_H_
-#endif
diff --git a/include/internal/routines/level1/xsum.h b/include/internal/routines/level1/xsum.h
deleted file mode 100644
index 2f633b52..00000000
--- a/include/internal/routines/level1/xsum.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsum routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSUM_H_
-#define CLBLAST_ROUTINES_XSUM_H_
-
-#include "internal/routine.h"
-#include "internal/routines/level1/xasum.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsum: public Xasum<T> {
- public:
-
-  // Members and methods from the base class
-  using Xasum<T>::DoAsum;
-
-  // Constructor
-  Xsum(Queue &queue, EventPointer event, const std::string &name = "SUM"):
-    Xasum<T>(queue, event, name) {
-  }
-
-  // Forwards to the regular absolute version. The implementation difference is realised in the
-  // kernel through a pre-processor macro based on the name of the routine.
-  StatusCode DoSum(const size_t n,
-                   const Buffer<T> &sum_buffer, const size_t sum_offset,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
-    return DoAsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc);
-  }
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSUM_H_
-#endif
diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h
deleted file mode 100644
index bd063afc..00000000
--- a/include/internal/routines/level1/xswap.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xswap routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSWAP_H_
-#define CLBLAST_ROUTINES_XSWAP_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xswap: public Routine {
- public:
-
-  // Constructor
-  Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSwap(const size_t n,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSWAP_H_
-#endif
diff --git a/include/internal/routines/level2/xgbmv.h b/include/internal/routines/level2/xgbmv.h
deleted file mode 100644
index bc94c77d..00000000
--- a/include/internal/routines/level2/xgbmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xgbmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xgbmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGBMV_H_
-#define CLBLAST_ROUTINES_XGBMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xgbmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGbmv(const Layout layout, const Transpose a_transpose,
-                    const size_t m, const size_t n, const size_t kl, const size_t ku,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGBMV_H_
-#endif
diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h
deleted file mode 100644
index e9804c62..00000000
--- a/include/internal/routines/level2/xgemv.h
+++ /dev/null
@@ -1,56 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xgemv routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGEMV_H_
-#define CLBLAST_ROUTINES_XGEMV_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xgemv: public Routine {
- public:
-
-  // Constructor
-  Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGemv(const Layout layout, const Transpose a_transpose,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-
-  // Generic version used also for other matrix-vector multiplications
-  StatusCode MatVec(const Layout layout, const Transpose a_transpose,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    bool fast_kernel, bool fast_kernel_rot,
-                    const size_t parameter, const bool packed,
-                    const size_t kl, const size_t ku);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGEMV_H_
-#endif
diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h
deleted file mode 100644
index 184f8477..00000000
--- a/include/internal/routines/level2/xger.h
+++ /dev/null
@@ -1,43 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xger routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGER_H_
-#define CLBLAST_ROUTINES_XGER_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xger: public Routine {
- public:
-
-  // Constructor
-  Xger(Queue &queue, EventPointer event, const std::string &name = "GER");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGer(const Layout layout,
-                   const size_t m, const size_t n,
-                   const T alpha,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                   const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGER_H_
-#endif
diff --git a/include/internal/routines/level2/xgerc.h b/include/internal/routines/level2/xgerc.h
deleted file mode 100644
index 6d06ef94..00000000
--- a/include/internal/routines/level2/xgerc.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xgerc routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGERC_H_
-#define CLBLAST_ROUTINES_XGERC_H_
-
-#include "internal/routines/level2/xger.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xgerc: public Xger<T> {
- public:
-
-  // Uses the regular Xger routine
-  using Xger<T>::DoGer;
-
-  // Constructor
-  Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGerc(const Layout layout,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGERC_H_
-#endif
diff --git a/include/internal/routines/level2/xgeru.h b/include/internal/routines/level2/xgeru.h
deleted file mode 100644
index 45ce1cba..00000000
--- a/include/internal/routines/level2/xgeru.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xgeru routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGERU_H_
-#define CLBLAST_ROUTINES_XGERU_H_
-
-#include "internal/routines/level2/xger.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xgeru: public Xger<T> {
- public:
-
-  // Uses the regular Xger routine
-  using Xger<T>::DoGer;
-
-  // Constructor
-  Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGeru(const Layout layout,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGERU_H_
-#endif
diff --git a/include/internal/routines/level2/xhbmv.h b/include/internal/routines/level2/xhbmv.h
deleted file mode 100644
index f0a6212c..00000000
--- a/include/internal/routines/level2/xhbmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhbmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xhbmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHBMV_H_
-#define CLBLAST_ROUTINES_XHBMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xhbmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHbmv(const Layout layout, const Triangle triangle,
-                    const size_t n, const size_t k,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHBMV_H_
-#endif
diff --git a/include/internal/routines/level2/xhemv.h b/include/internal/routines/level2/xhemv.h
deleted file mode 100644
index 3daf2457..00000000
--- a/include/internal/routines/level2/xhemv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhemv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xhemv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHEMV_H_
-#define CLBLAST_ROUTINES_XHEMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xhemv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHemv(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHEMV_H_
-#endif
diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h
deleted file mode 100644
index fca8bb97..00000000
--- a/include/internal/routines/level2/xher.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xher routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHER_H_
-#define CLBLAST_ROUTINES_XHER_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T, typename U>
-class Xher: public Routine {
- public:
-
-  // Constructor
-  Xher(Queue &queue, EventPointer event, const std::string &name = "HER");
-
-  // Translates alpha of type 'U' into type 'T'
-  T GetAlpha(const U alpha);
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHer(const Layout layout, const Triangle triangle,
-                   const size_t n,
-                   const U alpha,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                   const bool packed = false);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHER_H_
-#endif
diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h
deleted file mode 100644
index 9a7610f1..00000000
--- a/include/internal/routines/level2/xher2.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xher2 routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHER2_H_
-#define CLBLAST_ROUTINES_XHER2_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xher2: public Routine {
- public:
-
-  // Constructor
-  Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHer2(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const bool packed = false);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHER2_H_
-#endif
diff --git a/include/internal/routines/level2/xhpmv.h b/include/internal/routines/level2/xhpmv.h
deleted file mode 100644
index a1d5595a..00000000
--- a/include/internal/routines/level2/xhpmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhpmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xhpmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHPMV_H_
-#define CLBLAST_ROUTINES_XHPMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xhpmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHpmv(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &ap_buffer, const size_t ap_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHPMV_H_
-#endif
diff --git a/include/internal/routines/level2/xhpr.h b/include/internal/routines/level2/xhpr.h
deleted file mode 100644
index 6554d74c..00000000
--- a/include/internal/routines/level2/xhpr.h
+++ /dev/null
@@ -1,45 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhpr routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHPR_H_
-#define CLBLAST_ROUTINES_XHPR_H_
-
-#include "internal/routines/level2/xher.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T, typename U>
-class Xhpr: public Xher<T,U> {
- public:
-
-  // Uses the regular Xher routine
-  using Xher<T,U>::DoHer;
-
-  // Constructor
-  Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHpr(const Layout layout, const Triangle triangle,
-                   const size_t n,
-                   const U alpha,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &ap_buffer, const size_t ap_offset);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHPR_H_
-#endif
diff --git a/include/internal/routines/level2/xhpr2.h b/include/internal/routines/level2/xhpr2.h
deleted file mode 100644
index d95e7b61..00000000
--- a/include/internal/routines/level2/xhpr2.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhpr2 routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHPR2_H_
-#define CLBLAST_ROUTINES_XHPR2_H_
-
-#include "internal/routines/level2/xher2.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xhpr2: public Xher2<T> {
- public:
-
-  // Uses the regular Xher2 routine
-  using Xher2<T>::DoHer2;
-
-  // Constructor
-  Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHpr2(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &ap_buffer, const size_t ap_offset);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHPR2_H_
-#endif
diff --git a/include/internal/routines/level2/xsbmv.h b/include/internal/routines/level2/xsbmv.h
deleted file mode 100644
index 4328e377..00000000
--- a/include/internal/routines/level2/xsbmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsbmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xsbmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSBMV_H_
-#define CLBLAST_ROUTINES_XSBMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsbmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSbmv(const Layout layout, const Triangle triangle,
-                    const size_t n, const size_t k,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSBMV_H_
-#endif
diff --git a/include/internal/routines/level2/xspmv.h b/include/internal/routines/level2/xspmv.h
deleted file mode 100644
index ca3e28b6..00000000
--- a/include/internal/routines/level2/xspmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xspmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xspmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSPMV_H_
-#define CLBLAST_ROUTINES_XSPMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xspmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSpmv(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &ap_buffer, const size_t ap_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSPMV_H_
-#endif
diff --git a/include/internal/routines/level2/xspr.h b/include/internal/routines/level2/xspr.h
deleted file mode 100644
index 7e91abc5..00000000
--- a/include/internal/routines/level2/xspr.h
+++ /dev/null
@@ -1,45 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xspr routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSPR_H_
-#define CLBLAST_ROUTINES_XSPR_H_
-
-#include "internal/routines/level2/xher.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xspr: public Xher<T,T> {
- public:
-
-  // Uses the regular Xher routine
-  using Xher<T,T>::DoHer;
-
-  // Constructor
-  Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSpr(const Layout layout, const Triangle triangle,
-                   const size_t n,
-                   const T alpha,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &ap_buffer, const size_t ap_offset);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSPR_H_
-#endif
diff --git a/include/internal/routines/level2/xspr2.h b/include/internal/routines/level2/xspr2.h
deleted file mode 100644
index a34be8e8..00000000
--- a/include/internal/routines/level2/xspr2.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xspr2 routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSPR2_H_
-#define CLBLAST_ROUTINES_XSPR2_H_
-
-#include "internal/routines/level2/xher2.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xspr2: public Xher2<T> {
- public:
-
-  // Uses the regular Xher2 routine
-  using Xher2<T>::DoHer2;
-
-  // Constructor
-  Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSpr2(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &ap_buffer, const size_t ap_offset);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSPR2_H_
-#endif
diff --git a/include/internal/routines/level2/xsymv.h b/include/internal/routines/level2/xsymv.h
deleted file mode 100644
index 98a0ce88..00000000
--- a/include/internal/routines/level2/xsymv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsymv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xsymv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYMV_H_
-#define CLBLAST_ROUTINES_XSYMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsymv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSymv(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const T beta,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYMV_H_
-#endif
diff --git a/include/internal/routines/level2/xsyr.h b/include/internal/routines/level2/xsyr.h
deleted file mode 100644
index f88498ae..00000000
--- a/include/internal/routines/level2/xsyr.h
+++ /dev/null
@@ -1,45 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsyr routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYR_H_
-#define CLBLAST_ROUTINES_XSYR_H_
-
-#include "internal/routines/level2/xher.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsyr: public Xher<T,T> {
- public:
-
-  // Uses the regular Xher routine
-  using Xher<T,T>::DoHer;
-
-  // Constructor
-  Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSyr(const Layout layout, const Triangle triangle,
-                   const size_t n,
-                   const T alpha,
-                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                   const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYR_H_
-#endif
diff --git a/include/internal/routines/level2/xsyr2.h b/include/internal/routines/level2/xsyr2.h
deleted file mode 100644
index d2d3143a..00000000
--- a/include/internal/routines/level2/xsyr2.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsyr2 routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYR2_H_
-#define CLBLAST_ROUTINES_XSYR2_H_
-
-#include "internal/routines/level2/xher2.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsyr2: public Xher2<T> {
- public:
-
-  // Uses the regular Xher2 routine
-  using Xher2<T>::DoHer2;
-
-  // Constructor
-  Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSyr2(const Layout layout, const Triangle triangle,
-                    const size_t n,
-                    const T alpha,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
-                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYR2_H_
-#endif
diff --git a/include/internal/routines/level2/xtbmv.h b/include/internal/routines/level2/xtbmv.h
deleted file mode 100644
index 493a9853..00000000
--- a/include/internal/routines/level2/xtbmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xtbmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xtbmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XTBMV_H_
-#define CLBLAST_ROUTINES_XTBMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xtbmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::queue_;
-  using Xgemv<T>::context_;
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoTbmv(const Layout layout, const Triangle triangle,
-                    const Transpose a_transpose, const Diagonal diagonal,
-                    const size_t n, const size_t k,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XTBMV_H_
-#endif
diff --git a/include/internal/routines/level2/xtpmv.h b/include/internal/routines/level2/xtpmv.h
deleted file mode 100644
index ce5cae6f..00000000
--- a/include/internal/routines/level2/xtpmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xtpmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xtpmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XTPMV_H_
-#define CLBLAST_ROUTINES_XTPMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xtpmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::queue_;
-  using Xgemv<T>::context_;
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoTpmv(const Layout layout, const Triangle triangle,
-                    const Transpose a_transpose, const Diagonal diagonal,
-                    const size_t n,
-                    const Buffer<T> &ap_buffer, const size_t ap_offset,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XTPMV_H_
-#endif
diff --git a/include/internal/routines/level2/xtrmv.h b/include/internal/routines/level2/xtrmv.h
deleted file mode 100644
index 4407bad7..00000000
--- a/include/internal/routines/level2/xtrmv.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xtrmv routine. It is based on the generalized mat-vec multiplication
-// routine (Xgemv). The Xtrmv class inherits from the templated class Xgemv, allowing it to call the
-// "MatVec" function directly.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XTRMV_H_
-#define CLBLAST_ROUTINES_XTRMV_H_
-
-#include "internal/routines/level2/xgemv.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xtrmv: public Xgemv<T> {
- public:
-
-  // Uses the generic matrix-vector routine
-  using Xgemv<T>::queue_;
-  using Xgemv<T>::context_;
-  using Xgemv<T>::MatVec;
-
-  // Constructor
-  Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoTrmv(const Layout layout, const Triangle triangle,
-                    const Transpose a_transpose, const Diagonal diagonal,
-                    const size_t n,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XTRMV_H_
-#endif
diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h
deleted file mode 100644
index c0541eef..00000000
--- a/include/internal/routines/level3/xgemm.h
+++ /dev/null
@@ -1,48 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xgemm routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XGEMM_H_
-#define CLBLAST_ROUTINES_XGEMM_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xgemm: public Routine {
- public:
-
-  // Constructor
-  Xgemm(Queue &queue, EventPointer event, const std::string &name = "GEMM");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
-                    const size_t m, const size_t n, const size_t k,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
-                    const T beta,
-                    const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- protected:
-  // Static variable to get the precision
-  const static Precision precision_;
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XGEMM_H_
-#endif
diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h
deleted file mode 100644
index e0f35669..00000000
--- a/include/internal/routines/level3/xhemm.h
+++ /dev/null
@@ -1,54 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xhemm routine. It is based on the generalized matrix multiplication
-// routine (Xgemm). The implementation is very similar to the Xsymm routine.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHEMM_H_
-#define CLBLAST_ROUTINES_XHEMM_H_
-
-#include "internal/routines/level3/xgemm.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xhemm: public Xgemm<T> {
- public:
-
-  // Uses methods and variables the regular Xgemm routine
-  using Xgemm<T>::precision_;
-  using Xgemm<T>::routine_name_;
-  using Xgemm<T>::queue_;
-  using Xgemm<T>::context_;
-  using Xgemm<T>::device_;
-  using Xgemm<T>::db_;
-  using Xgemm<T>::DoGemm;
-
-  // Constructor
-  Xhemm(Queue &queue, EventPointer event, const std::string &name = "HEMM");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
-                    const T beta,
-                    const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHEMM_H_
-#endif
diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h
deleted file mode 100644
index b7764e18..00000000
--- a/include/internal/routines/level3/xher2k.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xher2k routine. The precision is implemented using the template argument
-// 'T', whereas the alpha/beta arguments are of type 'U'. The implementation is very similar to the
-// Xsyr2k routine.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHER2K_H_
-#define CLBLAST_ROUTINES_XHER2K_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T, typename U>
-class Xher2k: public Routine {
- public:
-
-  // Constructor
-  Xher2k(Queue &queue, EventPointer event, const std::string &name = "HER2K");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
-                     const size_t n, const size_t k,
-                     const T alpha,
-                     const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                     const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
-                     const U beta,
-                     const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHER2K_H_
-#endif
diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h
deleted file mode 100644
index abcf4c1a..00000000
--- a/include/internal/routines/level3/xherk.h
+++ /dev/null
@@ -1,45 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xherk routine. The precision is implemented using the template argument
-// 'T', whereas the alpha/beta arguments are of type 'U'. The implementation is very similar to the
-// Xsyrk routine.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XHERK_H_
-#define CLBLAST_ROUTINES_XHERK_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T, typename U>
-class Xherk: public Routine {
- public:
-
-  // Constructor
-  Xherk(Queue &queue, EventPointer event, const std::string &name = "HERK");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
-                    const size_t n, const size_t k,
-                    const U alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const U beta,
-                    const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XHERK_H_
-#endif
diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h
deleted file mode 100644
index 889abfb7..00000000
--- a/include/internal/routines/level3/xsymm.h
+++ /dev/null
@@ -1,56 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsymm routine. It is based on the generalized matrix multiplication
-// routine (Xgemm). The Xsymm class inherits from the templated class Xgemm, allowing it to call the
-// "DoGemm" function directly. The "DoSymm" function first preprocesses the symmetric matrix by
-// transforming it into a general matrix, and then calls the regular GEMM code.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYMM_H_
-#define CLBLAST_ROUTINES_XSYMM_H_
-
-#include "internal/routines/level3/xgemm.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsymm: public Xgemm<T> {
- public:
-
-  // Uses methods and variables the regular Xgemm routine
-  using Xgemm<T>::precision_;
-  using Xgemm<T>::routine_name_;
-  using Xgemm<T>::queue_;
-  using Xgemm<T>::context_;
-  using Xgemm<T>::device_;
-  using Xgemm<T>::db_;
-  using Xgemm<T>::DoGemm;
-
-  // Constructor
-  Xsymm(Queue &queue, EventPointer event, const std::string &name = "SYMM");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
-                    const T beta,
-                    const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYMM_H_
-#endif
diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h
deleted file mode 100644
index f75c91e5..00000000
--- a/include/internal/routines/level3/xsyr2k.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsyr2k routine. The precision is implemented using a template argument.
-// The implementation is very similar to Xsyrk (see header for details), except for the fact that
-// the main XgemmUpper/XgemmLower kernel is called twice: C = AB^T + C and C = BA^T + C.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYR2K_H_
-#define CLBLAST_ROUTINES_XSYR2K_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsyr2k: public Routine {
- public:
-
-  // Constructor
-  Xsyr2k(Queue &queue, EventPointer event, const std::string &name = "SYR2K");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
-                     const size_t n, const size_t k,
-                     const T alpha,
-                     const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                     const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
-                     const T beta,
-                     const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYR2K_H_
-#endif
diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h
deleted file mode 100644
index 0710fa74..00000000
--- a/include/internal/routines/level3/xsyrk.h
+++ /dev/null
@@ -1,47 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xsyrk routine. The precision is implemented using a template argument.
-// The implementation is based on the regular Xgemm routine and kernel, but with two main changes:
-// 1) The final unpad(transpose) kernel updates only the upper/lower triangular part.
-// 2) The main Xgemm kernel masks workgroups not contributing to usefull data. This is only for
-//    performance reasons, as the actual masking is done later (see the first point).
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XSYRK_H_
-#define CLBLAST_ROUTINES_XSYRK_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xsyrk: public Routine {
- public:
-
-  // Constructor
-  Xsyrk(Queue &queue, EventPointer event, const std::string &name = "SYRK");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
-                    const size_t n, const size_t k,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const T beta,
-                    const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XSYRK_H_
-#endif
diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h
deleted file mode 100644
index e18ad17a..00000000
--- a/include/internal/routines/level3/xtrmm.h
+++ /dev/null
@@ -1,54 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xtrmm routine. The implementation is based on first transforming the
-// upper/lower unit/non-unit triangular matrix into a regular matrix and then calling the GEMM
-// routine. Therefore, this class inherits from the Xgemm class.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XTRMM_H_
-#define CLBLAST_ROUTINES_XTRMM_H_
-
-#include "internal/routines/level3/xgemm.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xtrmm: public Xgemm<T> {
- public:
-
-  // Uses methods and variables the regular Xgemm routine
-  using Xgemm<T>::precision_;
-  using Xgemm<T>::routine_name_;
-  using Xgemm<T>::queue_;
-  using Xgemm<T>::context_;
-  using Xgemm<T>::device_;
-  using Xgemm<T>::db_;
-  using Xgemm<T>::DoGemm;
-
-  // Constructor
-  Xtrmm(Queue &queue, EventPointer event, const std::string &name = "TRMM");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle,
-                    const Transpose a_transpose, const Diagonal diagonal,
-                    const size_t m, const size_t n,
-                    const T alpha,
-                    const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                    const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XTRMM_H_
-#endif
diff --git a/include/internal/routines/levelx/xomatcopy.h b/include/internal/routines/levelx/xomatcopy.h
deleted file mode 100644
index d2acb50d..00000000
--- a/include/internal/routines/levelx/xomatcopy.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the Xomatcopy routine. The precision is implemented using a template argument.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_ROUTINES_XOMATCOPY_H_
-#define CLBLAST_ROUTINES_XOMATCOPY_H_
-
-#include "internal/routine.h"
-
-namespace clblast {
-// =================================================================================================
-
-// See comment at top of file for a description of the class
-template <typename T>
-class Xomatcopy: public Routine {
- public:
-
-  // Constructor
-  Xomatcopy(Queue &queue, EventPointer event, const std::string &name = "OMATCOPY");
-
-  // Templated-precision implementation of the routine
-  StatusCode DoOmatcopy(const Layout layout, const Transpose a_transpose,
-                        const size_t m, const size_t n, const T alpha,
-                        const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
-                        const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
-};
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_ROUTINES_XOMATCOPY_H_
-#endif
diff --git a/include/internal/tuning.h b/include/internal/tuning.h
deleted file mode 100644
index a44f79d6..00000000
--- a/include/internal/tuning.h
+++ /dev/null
@@ -1,161 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the interface to the CLTune auto-tuner. This is only used for the optional
-// and stand-alone tuner binaries and not part of the core of CLBlast.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_TUNING_H_
-#define CLBLAST_TUNING_H_
-
-#include <vector>
-#include <string>
-
-#include <cltune.h>
-
-#include "internal/utilities.h"
-
-namespace clblast {
-// =================================================================================================
-
-// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
-// the results. Used for all types of kernel families. Note that this is a header-only function so
-// that it is automatically compiled for the various kernels (given as the 'C' template argument).
-template <typename C, typename T>
-void Tuner(int argc, char* argv[]) {
-
-  // Sets the parameters and platform/device for which to tune (command-line options)
-  auto help = std::string{"* Options given/available:\n"};
-  auto args = Arguments<T>{};
-  args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0});
-  args.device_id   = GetArgument(argc, argv, help, kArgDevice, size_t{0});
-  args.precision   = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle);
-  for (auto &o: C::GetOptions()) {
-    if (o == kArgM)        { args.m        = GetArgument(argc, argv, help, kArgM, C::DefaultM()); }
-    if (o == kArgN)        { args.n        = GetArgument(argc, argv, help, kArgN, C::DefaultN()); }
-    if (o == kArgK)        { args.k        = GetArgument(argc, argv, help, kArgK, C::DefaultK()); }
-    if (o == kArgAlpha)    { args.alpha    = GetArgument(argc, argv, help, kArgAlpha, GetScalar<T>()); }
-    if (o == kArgBeta)     { args.beta     = GetArgument(argc, argv, help, kArgBeta, GetScalar<T>()); }
-    if (o == kArgFraction) { args.fraction = GetArgument(argc, argv, help, kArgFraction, C::DefaultFraction()); }
-  }
-  fprintf(stdout, "%s\n", help.c_str());
-
-  // Tests validity of the given arguments
-  C::TestValidArguments(args);
-
-  // Tests for validity of the precision and retrieves properties
-  auto isAMD = false;
-  auto isARM = false;
-  auto isGPU = false;
-  {
-    const auto platform = Platform(args.platform_id);
-    const auto device = Device(platform, args.device_id);
-    if (!PrecisionSupported<T>(device)) {
-      printf("* Unsupported precision, skipping this tuning run\n\n");
-      return;
-    }
-    isAMD = device.IsAMD();
-    isARM = device.IsARM();
-    isGPU = device.IsGPU();
-  }
-
-  // Creates input buffers with random data
-  auto x_vec = std::vector<T>(C::GetSizeX(args));
-  auto y_vec = std::vector<T>(C::GetSizeY(args));
-  auto a_mat = std::vector<T>(C::GetSizeA(args));
-  auto b_mat = std::vector<T>(C::GetSizeB(args));
-  auto c_mat = std::vector<T>(C::GetSizeC(args));
-  auto temp = std::vector<T>(C::GetSizeTemp(args));
-  PopulateVector(x_vec);
-  PopulateVector(y_vec);
-  PopulateVector(a_mat);
-  PopulateVector(b_mat);
-  PopulateVector(c_mat);
-  PopulateVector(temp);
-
-  // Initializes the tuner for the chosen device
-  cltune::Tuner tuner(args.platform_id, args.device_id);
-
-  // Use full-search to explore all parameter combinations or random-search to search only a part of
-  // the parameter values. The fraction is set as a command-line argument.
-  if (args.fraction == 1.0 || args.fraction == 0.0) {
-    tuner.UseFullSearch();
-  }
-  else {
-    tuner.UseRandomSearch(1.0/args.fraction);
-  }
-
-  // Set extra settings for specific defines. This mimics src/routine.cc.
-  auto defines = std::string{""};
-  if (isAMD && isGPU) {
-    defines += "#define USE_CL_MAD 1\n";
-    defines += "#define USE_STAGGERED_INDICES 1\n";
-  }
-  if (isARM && isGPU) {
-    defines += "#define GLOBAL_MEM_FENCE 1\n";
-  }
-
-  // Loads the kernel sources and defines the kernel to tune
-  auto sources = defines + C::GetSources();
-  auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize());
-  tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef());
-
-  // Sets the tunable parameters and their possible values
-  C::SetParameters(tuner, id);
-  C::SetConstraints(tuner, id);
-  C::SetLocalMemorySize(tuner, id, args);
-
-  // Tests for a specific precision
-  tuner.AddParameter(id, "PRECISION", {static_cast<size_t>(args.precision)});
-  tuner.AddParameterReference("PRECISION", static_cast<size_t>(args.precision));
-
-  // Modifies the thread-sizes (both global and local) based on the parameters
-  for (auto &parameters: C::MulLocal()) { tuner.MulLocalSize(id, parameters); }
-  for (auto &parameters: C::DivLocal()) { tuner.DivLocalSize(id, parameters); }
-  for (auto &parameters: C::MulGlobal()) { tuner.MulGlobalSize(id, parameters); }
-  for (auto &parameters: C::DivGlobal()) { tuner.DivGlobalSize(id, parameters); }
-
-  // Sets the function's arguments
-  C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat, temp);
-
-  // Starts the tuning process
-  tuner.Tune();
-
-  // Prints the results to screen
-  auto time_ms = tuner.PrintToScreen();
-  tuner.PrintFormatted();
-
-  // Also prints the performance of the best-case in terms of GB/s or GFLOPS
-  if (time_ms != 0.0) {
-    printf("[ -------> ] %.1lf ms", time_ms);
-    printf(" or %.1lf %s\n", C::GetMetric(args)/(time_ms*1.0e6), C::PerformanceUnit().c_str());
-  }
-
-  // Outputs the results as JSON to disk, including some meta-data
-  auto precision_string = std::to_string(static_cast<size_t>(args.precision));
-  auto metadata = std::vector<std::pair<std::string,std::string>>{
-    {"kernel_family", C::KernelFamily()},
-    {"precision", precision_string}
-  };
-  for (auto &o: C::GetOptions()) {
-    if (o == kArgM)     { metadata.push_back({"arg_m", std::to_string(args.m)}); }
-    if (o == kArgN)     { metadata.push_back({"arg_n", std::to_string(args.n)}); }
-    if (o == kArgK)     { metadata.push_back({"arg_k", std::to_string(args.k)}); }
-    if (o == kArgAlpha) { metadata.push_back({"arg_alpha", ToString(args.alpha)}); }
-    if (o == kArgBeta)  { metadata.push_back({"arg_beta", ToString(args.beta)}); }
-  }
-  tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata);
-}
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_TUNING_H_
-#endif
diff --git a/include/internal/utilities.h b/include/internal/utilities.h
deleted file mode 100644
index 7092bcdd..00000000
--- a/include/internal/utilities.h
+++ /dev/null
@@ -1,257 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-//   Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file provides declarations for the common (test) utility functions such as a command-line
-// argument parser. On top of this, it serves as the 'common' header, including the C++ OpenCL
-// wrapper. These utilities are not only used for CLBlast, but also included as part of the tuners,
-// the performance client and the correctness testers.
-//
-// =================================================================================================
-
-#ifndef CLBLAST_UTILITIES_H_
-#define CLBLAST_UTILITIES_H_
-
-#include <string>
-#include <functional>
-#include <complex>
-
-#include "clblast.h"
-#include "clblast_half.h"
-#include "internal/clpp11.h"
-
-namespace clblast {
-// =================================================================================================
-
-// Shorthands for complex data-types
-using float2 = std::complex<float>;
-using double2 = std::complex<double>;
-
-// Khronos OpenCL extensions
-const std::string kKhronosHalfPrecision = "cl_khr_fp16";
-const std::string kKhronosDoublePrecision = "cl_khr_fp64";
-
-// Catched an unknown error
-constexpr auto kUnknownError = -999;
-
-// =================================================================================================
-
-// The routine-specific arguments in string form
-constexpr auto kArgM = "m";
-constexpr auto kArgN = "n";
-constexpr auto kArgK = "k";
-constexpr auto kArgKL = "kl";
-constexpr auto kArgKU = "ku";
-constexpr auto kArgLayout = "layout";
-constexpr auto kArgATransp = "transA";
-constexpr auto kArgBTransp = "transB";
-constexpr auto kArgSide = "side";
-constexpr auto kArgTriangle = "triangle";
-constexpr auto kArgDiagonal = "diagonal";
-constexpr auto kArgXInc = "incx";
-constexpr auto kArgYInc = "incy";
-constexpr auto kArgXOffset = "offx";
-constexpr auto kArgYOffset = "offy";
-constexpr auto kArgALeadDim = "lda";
-constexpr auto kArgBLeadDim = "ldb";
-constexpr auto kArgCLeadDim = "ldc";
-constexpr auto kArgAOffset = "offa";
-constexpr auto kArgBOffset = "offb";
-constexpr auto kArgCOffset = "offc";
-constexpr auto kArgAPOffset = "offap";
-constexpr auto kArgDotOffset = "offdot";
-constexpr auto kArgNrm2Offset = "offnrm2";
-constexpr auto kArgAsumOffset = "offasum";
-constexpr auto kArgImaxOffset = "offimax";
-constexpr auto kArgAlpha = "alpha";
-constexpr auto kArgBeta = "beta";
-
-// The tuner-specific arguments in string form
-constexpr auto kArgFraction = "fraction";
-
-// The client-specific arguments in string form
-constexpr auto kArgCompareclblas = "clblas";
-constexpr auto kArgComparecblas = "cblas";
-constexpr auto kArgStepSize = "step";
-constexpr auto kArgNumSteps = "num_steps";
-constexpr auto kArgNumRuns = "runs";
-
-// The client-specific arguments in string form
-constexpr auto kArgFullTest = "full_test";
-constexpr auto kArgVerbose = "verbose";
-
-// The common arguments in string form
-constexpr auto kArgPlatform = "platform";
-constexpr auto kArgDevice = "device";
-constexpr auto kArgPrecision = "precision";
-constexpr auto kArgHelp = "h";
-constexpr auto kArgQuiet = "q";
-constexpr auto kArgNoAbbreviations = "no_abbrv";
-
-// =================================================================================================
-
-// Returns a scalar with a default value
-template <typename T>
-T GetScalar();
-
-// Returns a scalar of value 1
-template <typename T>
-T ConstantOne();
-
-// =================================================================================================
-
-// Structure containing all possible arguments for test clients, including their default values
-template <typename T>
-struct Arguments {
-  // Routine-specific arguments
-  size_t m = 1;
-  size_t n = 1;
-  size_t k = 1;
-  size_t ku = 1;
-  size_t kl = 1;
-  Layout layout = Layout::kRowMajor;
-  Transpose a_transpose = Transpose::kNo;
-  Transpose b_transpose = Transpose::kNo;
-  Side side = Side::kLeft;
-  Triangle triangle = Triangle::kUpper;
-  Diagonal diagonal = Diagonal::kUnit;
-  size_t x_inc = 1;
-  size_t y_inc = 1;
-  size_t x_offset = 0;
-  size_t y_offset = 0;
-  size_t a_ld = 1;
-  size_t b_ld = 1;
-  size_t c_ld = 1;
-  size_t a_offset = 0;
-  size_t b_offset = 0;
-  size_t c_offset = 0;
-  size_t ap_offset = 0;
-  size_t dot_offset = 0;
-  size_t nrm2_offset = 0;
-  size_t asum_offset = 0;
-  size_t imax_offset = 0;
-  T alpha = ConstantOne<T>();
-  T beta = ConstantOne<T>();
-  size_t x_size = 1;
-  size_t y_size = 1;
-  size_t a_size = 1;
-  size_t b_size = 1;
-  size_t c_size = 1;
-  size_t ap_size = 1;
-  size_t scalar_size = 1;
-  // Tuner-specific arguments
-  double fraction = 1.0;
-  // Client-specific arguments
-  int compare_clblas = 1;
-  int compare_cblas = 1;
-  size_t step = 1;
-  size_t num_steps = 0;
-  size_t num_runs = 10;
-  // Common arguments
-  size_t platform_id = 0;
-  size_t device_id = 0;
-  Precision precision = Precision::kSingle;
-  bool print_help = false;
-  bool silent = false;
-  bool no_abbrv = false;
-};
-
-// Structure containing all possible buffers for test clients
-template <typename T>
-struct Buffers {
-  Buffer<T> x_vec;
-  Buffer<T> y_vec;
-  Buffer<T> a_mat;
-  Buffer<T> b_mat;
-  Buffer<T> c_mat;
-  Buffer<T> ap_mat;
-  Buffer<T> scalar;
-};
-
-// =================================================================================================
-
-// Converts a value (e.g. an integer) to a string. This also covers special cases for CLBlast
-// data-types such as the Layout and Transpose data-types.
-template <typename T>
-std::string ToString(T value);
-
-// =================================================================================================
-
-// Helper for the function "GetArgument"
-template <typename T>
-T ConvertArgument(const char* value);
-
-// Basic argument parser, matching patterns in the form of "-option value" and "--option value"
-template <typename T>
-T GetArgument(const int argc, char *argv[], std::string &help,
-              const std::string &option, const T default_value);
-
-// Returns the precision only
-Precision GetPrecision(const int argc, char *argv[],
-                       const Precision default_precision = Precision::kSingle);
-
-// As in "GetArgument", but now only checks whether an argument is given or not
-bool CheckArgument(const int argc, char *argv[], std::string &help, const std::string &option);
-
-// =================================================================================================
-
-// Helper function to check for errors in the status code
-constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
-
-// =================================================================================================
-
-// Returns a random number to be used as a seed
-unsigned int GetRandomSeed();
-
-// Test/example data lower and upper limit
-constexpr auto kTestDataLowerLimit = -2.0;
-constexpr auto kTestDataUpperLimit = 2.0;
-
-// Populates a vector with random data
-template <typename T>
-void PopulateVector(std::vector<T> &vector);
-
-// =================================================================================================
-
-// Conversion between half and single-precision
-std::vector<float> HalfToFloatBuffer(const std::vector<half>& source);
-void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source);
-
-// As above, but now for OpenCL data-types instead of std::vectors
-Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, cl_command_queue queue_raw);
-void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, cl_command_queue queue_raw);
-
-// =================================================================================================
-
-// Rounding functions
-size_t CeilDiv(const size_t x, const size_t y);
-size_t Ceil(const size_t x, const size_t y);
-
-// Returns whether or not 'a' is a multiple of 'b'
-bool IsMultiple(const size_t a, const size_t b);
-
-// =================================================================================================
-
-// Convert the precision enum into bytes, e.g. a double takes up 8 bytes
-size_t GetBytes(const Precision precision);
-
-// Convert the template argument into a precision value
-template <typename T>
-Precision PrecisionValue();
-
-// =================================================================================================
-
-// Returns false is this precision is not supported by the device
-template <typename T>
-bool PrecisionSupported(const Device &device);
-
-// =================================================================================================
-} // namespace clblast
-
-// CLBLAST_UTILITIES_H_
-#endif
author	Cedric Nugteren <web@cedricnugteren.nl>	2016-06-18 20:20:13 +0200
committer	Cedric Nugteren <web@cedricnugteren.nl>	2016-06-18 20:20:13 +0200
commit	f726fbdc9fef937fbe32222f0e66aac8d7e2678c (patch)
tree	cb62cc877ea239052fb1882f7bf327aace3e7776 /include/internal
parent	bacb5d2bb2ea7b141034878090aca850db8f9d00 (diff)