summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-27 07:18:06 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-27 07:18:06 +0200
commitf7199b831f847340f0921ef2140a4e64809db037 (patch)
treed725b7e63b0662598ad4be0a4c2457820ded8ed4 /include
parentb10f4a633c4ffb3bb04d35503396ff94528df4d0 (diff)
Now using the new Claduc C++11 OpenCL header
Diffstat (limited to 'include')
-rw-r--r--include/internal/clpp11.h640
-rw-r--r--include/internal/database.h26
-rw-r--r--include/internal/database/copy.h40
-rw-r--r--include/internal/database/pad.h40
-rw-r--r--include/internal/database/padtranspose.h40
-rw-r--r--include/internal/database/transpose.h40
-rw-r--r--include/internal/database/xaxpy.h40
-rw-r--r--include/internal/database/xgemm.h48
-rw-r--r--include/internal/database/xgemv.h40
-rw-r--r--include/internal/routine.h23
-rw-r--r--include/internal/routines/level1/xaxpy.h20
-rw-r--r--include/internal/routines/level2/xgemv.h23
-rw-r--r--include/internal/routines/level3/xgemm.h25
-rw-r--r--include/internal/routines/level3/xhemm.h24
-rw-r--r--include/internal/routines/level3/xher2k.h25
-rw-r--r--include/internal/routines/level3/xherk.h22
-rw-r--r--include/internal/routines/level3/xsymm.h24
-rw-r--r--include/internal/routines/level3/xsyr2k.h25
-rw-r--r--include/internal/routines/level3/xsyrk.h22
-rw-r--r--include/internal/routines/level3/xtrmm.h22
-rw-r--r--include/internal/utilities.h11
21 files changed, 707 insertions, 513 deletions
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h
index d48b646d..2c2cc797 100644
--- a/include/internal/clpp11.h
+++ b/include/internal/clpp11.h
@@ -7,18 +7,17 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
-// This file implements a C++11 wrapper around some OpenCL C data-types, similar to Khronos' cl.hpp.
-// The main differences are modern C++11 support and a straightforward implemenation of the basic
-// needs (as required for this project). It also includes some extra functionality not available
-// in cl.hpp, such as including the sources with a Program object and querying a Kernel's validity
-// in terms of local memory usage.
+// This file implements a bunch of C++11 classes that act as wrappers around OpenCL objects and API
+// calls. The main benefits are increased abstraction, automatic memory management, and portability.
+// Portability here means that a similar header exists for CUDA with the same classes and
+// interfaces. In other words, moving from the OpenCL API to the CUDA API becomes a one-line change.
//
-// This file is adapted from the C++ bindings from the CLTune project and therefore contains the
-// following copyright notice:
+// This file is taken from the Claduc project <https://github.com/CNugteren/Claduc> and therefore
+// contains the following header copyright notice:
//
// =================================================================================================
//
-// Copyright 2014 SURFsara
+// Copyright 2015 SURFsara
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -37,13 +36,15 @@
#ifndef CLBLAST_CLPP11_H_
#define CLBLAST_CLPP11_H_
-#include <utility> // std::swap
+// C++
#include <algorithm> // std::copy
-#include <string> // std::string
-#include <vector> // std::vector
+#include <string> // std::string
+#include <vector> // std::vector
+#include <memory> // std::shared_ptr
#include <stdexcept> // std::runtime_error
+#include <numeric> // std::accumulate
-// Includes the normal OpenCL C header
+// OpenCL
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/opencl.h>
#else
@@ -53,59 +54,46 @@
namespace clblast {
// =================================================================================================
-// Base class for any object
-class Object {
- protected:
+// Error occurred in the C++11 OpenCL header (this file)
+inline void Error(const std::string &message) {
+ throw std::runtime_error("Internal OpenCL error: "+message);
+}
- // Error handling (NOTE: these functions are [[noreturn]])
- void Error(const std::string &message) const {
- throw std::runtime_error("Internal OpenCL error: "+message);
+// Error occurred in OpenCL
+inline void CheckError(const cl_int status) {
+ if (status != CL_SUCCESS) {
+ throw std::runtime_error("Internal OpenCL error: "+std::to_string(status));
}
- void Error(const cl_int status) const {
- throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status));
- }
-};
-
-// =================================================================================================
-
-// Base class for objects which require memory management
-class ObjectWithState: public Object {
-
-};
+}
// =================================================================================================
-// C++11 version of cl_event
-class Event: public Object {
+// C++11 version of 'cl_event'
+class Event {
public:
- // Constructor based on the plain C data-type
+ // Constructor based on the regular OpenCL data-type
explicit Event(const cl_event event): event_(event) { }
- // New event
- Event(): event_() {}
+ // Regular constructor
+ explicit Event() { }
- // Public functions
- size_t GetProfilingStart() const {
+ // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
+ // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
+ // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx
+ float GetElapsedTime() const {
+ CheckError(clWaitForEvents(1, &event_));
auto bytes = size_t{0};
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
- auto result = size_t{0};
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &result, nullptr);
- return result;
- }
- size_t GetProfilingEnd() const {
- auto bytes = size_t{0};
+ auto time_start = size_t{0};
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
- auto result = size_t{0};
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &result, nullptr);
- return result;
- }
- cl_int Wait() const {
- return clWaitForEvents(1, &event_);
+ auto time_end = size_t{0};
+ clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
+ return (time_end - time_start) * 1.0e-6f;
}
- // Accessors to the private data-member
- cl_event operator()() const { return event_; }
+ // Accessor to the private data-member
cl_event& operator()() { return event_; }
private:
cl_event event_;
@@ -113,27 +101,25 @@ class Event: public Object {
// =================================================================================================
-// C++11 version of cl_platform_id
-class Platform: public Object {
+// C++11 version of 'cl_platform_id'
+class Platform {
public:
- // Constructor based on the plain C data-type
+ // Constructor based on the regular OpenCL data-type
explicit Platform(const cl_platform_id platform): platform_(platform) { }
- // Initialize the platform. Note that this constructor can throw exceptions!
+ // Initializes the platform
explicit Platform(const size_t platform_id) {
auto num_platforms = cl_uint{0};
- auto status = clGetPlatformIDs(0, nullptr, &num_platforms);
- if (status != CL_SUCCESS) { Error(status); }
+ CheckError(clGetPlatformIDs(0, nullptr, &num_platforms));
if (num_platforms == 0) { Error("no platforms found"); }
auto platforms = std::vector<cl_platform_id>(num_platforms);
- status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
- if (status != CL_SUCCESS) { Error(status); }
+ CheckError(clGetPlatformIDs(num_platforms, platforms.data(), nullptr));
if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
platform_ = platforms[platform_id];
}
- // Accessors to the private data-member
+ // Accessor to the private data-member
const cl_platform_id& operator()() const { return platform_; }
private:
cl_platform_id platform_;
@@ -141,40 +127,53 @@ class Platform: public Object {
// =================================================================================================
-// C++11 version of cl_device_id
-class Device: public Object {
+// C++11 version of 'cl_device_id'
+class Device {
public:
- // Constructor based on the plain C data-type
+ // Constructor based on the regular OpenCL data-type
explicit Device(const cl_device_id device): device_(device) { }
// Initialize the device. Note that this constructor can throw exceptions!
- explicit Device(const Platform &platform, const cl_device_type type, const size_t device_id) {
+ explicit Device(const Platform &platform, const size_t device_id) {
auto num_devices = cl_uint{0};
- auto status = clGetDeviceIDs(platform(), type, 0, nullptr, &num_devices);
- if (status != CL_SUCCESS) { Error(status); }
+ CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, 0, nullptr, &num_devices));
if (num_devices == 0) { Error("no devices found"); }
auto devices = std::vector<cl_device_id>(num_devices);
- status = clGetDeviceIDs(platform(), type, num_devices, devices.data(), nullptr);
- if (status != CL_SUCCESS) { Error(status); }
+ CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, num_devices, devices.data(), nullptr));
if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
device_ = devices[device_id];
}
- // Public functions
- std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
- cl_device_type Type() const { return GetInfo<cl_device_type>(CL_DEVICE_TYPE); }
- std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
- std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
- std::string Extensions() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
+ // Methods to retrieve device information
+ std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
+ std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
+ std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
+ std::string Type() const {
+ auto type = GetInfo<cl_device_type>(CL_DEVICE_TYPE);
+ switch(type) {
+ case CL_DEVICE_TYPE_CPU: return "CPU";
+ case CL_DEVICE_TYPE_GPU: return "GPU";
+ case CL_DEVICE_TYPE_ACCELERATOR: return "accelerator";
+ default: return "default";
+ }
+ }
size_t MaxWorkGroupSize() const { return GetInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); }
- cl_ulong LocalMemSize() const { return GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE); }
- cl_uint MaxWorkItemDimensions() const {
- return GetInfo<cl_uint>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
+ size_t MaxWorkItemDimensions() const {
+ return GetInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
}
std::vector<size_t> MaxWorkItemSizes() const {
return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
}
+ size_t LocalMemSize() const {
+ return static_cast<size_t>(GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE));
+ }
+ std::string Capabilities() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
+ size_t CoreClock() const { return GetInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY); }
+ size_t ComputeUnits() const { return GetInfo(CL_DEVICE_MAX_COMPUTE_UNITS); }
+ size_t MemorySize() const { return GetInfo(CL_DEVICE_GLOBAL_MEM_SIZE); }
+ size_t MemoryClock() const { return 0; } // Not exposed in OpenCL
+ size_t MemoryBusWidth() const { return 0; } // Not exposed in OpenCL
// Configuration-validity checks
bool IsLocalMemoryValid(const size_t local_mem_usage) const {
@@ -182,7 +181,7 @@ class Device: public Object {
}
bool IsThreadConfigValid(const std::vector<size_t> &local) const {
auto local_size = size_t{1};
- for (auto &item: local) { local_size *= item; }
+ for (const auto &item: local) { local_size *= item; }
for (auto i=size_t{0}; i<local.size(); ++i) {
if (local[i] > MaxWorkItemSizes()[i]) { return false; }
}
@@ -191,313 +190,404 @@ class Device: public Object {
return true;
}
- // Accessors to the private data-member
+ // Accessor to the private data-member
const cl_device_id& operator()() const { return device_; }
private:
+ cl_device_id device_;
- // Helper functions
+ // Private helper functions
template <typename T>
T GetInfo(const cl_device_info info) const {
auto bytes = size_t{0};
- clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
+ CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
auto result = T(0);
- clGetDeviceInfo(device_, info, bytes, &result, nullptr);
+ CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
return result;
}
+ size_t GetInfo(const cl_device_info info) const {
+ auto bytes = size_t{0};
+ CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
+ auto result = cl_uint(0);
+ CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
+ return static_cast<size_t>(result);
+ }
template <typename T>
std::vector<T> GetInfoVector(const cl_device_info info) const {
auto bytes = size_t{0};
- clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
+ CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
auto result = std::vector<T>(bytes/sizeof(T));
- clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
+ CheckError(clGetDeviceInfo(device_, info, bytes, result.data(), nullptr));
return result;
}
std::string GetInfoString(const cl_device_info info) const {
auto bytes = size_t{0};
- clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
- auto result = std::vector<char>(bytes);
- clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
- return std::string(result.data());
+ CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
+ auto result = std::string{};
+ result.resize(bytes);
+ CheckError(clGetDeviceInfo(device_, info, bytes, &result[0], nullptr));
+ return std::string{result.c_str()};
}
-
- cl_device_id device_;
};
// =================================================================================================
-// C++11 version of cl_context
-class Context: public ObjectWithState {
+// C++11 version of 'cl_context'
+class Context {
public:
- // Constructor based on the plain C data-type
- explicit Context(const cl_context context): context_(context) {
- clRetainContext(context_);
+ // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
+ explicit Context(const cl_context context):
+ context_(new cl_context) {
+ *context_ = context;
}
- // Memory management
- explicit Context(const Device &device) {
+ // Regular constructor with memory management
+ explicit Context(const Device &device):
+ context_(new cl_context, [](cl_context* c) { CheckError(clReleaseContext(*c)); delete c; }) {
auto status = CL_SUCCESS;
const cl_device_id dev = device();
- context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
- if (status != CL_SUCCESS) { Error(status); }
- }
- ~Context() {
- clReleaseContext(context_);
- }
- Context(const Context &other):
- context_(other.context_) {
- clRetainContext(context_);
- }
- Context& operator=(Context other) {
- swap(*this, other);
- return *this;
- }
- friend void swap(Context &first, Context &second) {
- std::swap(first.context_, second.context_);
+ *context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
+ CheckError(status);
}
- // Accessors to the private data-member
- const cl_context& operator()() const { return context_; }
+ // Accessor to the private data-member
+ const cl_context& operator()() const { return *context_; }
private:
- cl_context context_;
+ std::shared_ptr<cl_context> context_;
};
// =================================================================================================
-// C++11 version of cl_program. Additionally holds the program's source code.
-class Program: public ObjectWithState {
- public:
-
- // Note that there is no constructor based on the plain C data-type because of extra state
+// Enumeration of build statuses of the run-time compilation process
+enum class BuildStatus { kSuccess, kError, kInvalid };
- // Memory management
- explicit Program(const Context &context, const std::string &source):
- length_(source.length()) {
- std::copy(source.begin(), source.end(), back_inserter(source_));
- source_ptr_ = source_.data();
- auto status = CL_SUCCESS;
- program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
- if (status != CL_SUCCESS) { Error(status); }
- }
- ~Program() {
- clReleaseProgram(program_);
- }
- Program(const Program &other):
- length_(other.length_),
- source_(other.source_),
- source_ptr_(other.source_ptr_),
- program_(other.program_) {
- clRetainProgram(program_);
- }
- Program& operator=(Program other) {
- swap(*this, other);
- return *this;
- }
- friend void swap(Program &first, Program &second) {
- std::swap(first.length_, second.length_);
- std::swap(first.source_, second.source_);
- std::swap(first.source_ptr_, second.source_ptr_);
- std::swap(first.program_, second.program_);
+// C++11 version of 'cl_program'. Additionally holds the program's source code.
+class Program {
+ public:
+ // Note that there is no constructor based on the regular OpenCL data-type because of extra state
+
+ // Regular constructor with memory management
+ explicit Program(const Context &context, std::string source):
+ program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
+ length_(source.length()),
+ source_(std::move(source)),
+ source_ptr_(&source_[0]) {
+ auto status = CL_SUCCESS;
+ *program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
+ CheckError(status);
}
- // Public functions
- cl_int Build(const Device &device, const std::string &options) {
+ // Compiles the device program and returns whether or not there where any warnings/errors
+ BuildStatus Build(const Device &device, std::vector<std::string> &options) {
+ auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
const cl_device_id dev = device();
- return clBuildProgram(program_, 1, &dev, options.c_str(), nullptr, nullptr);
+ auto status = clBuildProgram(*program_, 1, &dev, options_string.c_str(), nullptr, nullptr);
+ if (status == CL_BUILD_PROGRAM_FAILURE) {
+ return BuildStatus::kError;
+ }
+ else if (status == CL_INVALID_BINARY) {
+ return BuildStatus::kInvalid;
+ }
+ else {
+ CheckError(status);
+ return BuildStatus::kSuccess;
+ }
}
+
+ // Retrieves the warning/error message from the compiler (if any)
std::string GetBuildInfo(const Device &device) const {
auto bytes = size_t{0};
- clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &bytes);
- auto result = std::vector<char>(bytes);
- clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, bytes, result.data(), nullptr);
- return std::string(result.data());
+ auto query = cl_program_build_info{CL_PROGRAM_BUILD_LOG};
+ CheckError(clGetProgramBuildInfo(*program_, device(), query, 0, nullptr, &bytes));
+ auto result = std::string{};
+ result.resize(bytes);
+ CheckError(clGetProgramBuildInfo(*program_, device(), query, bytes, &result[0], nullptr));
+ return result;
+ }
+
+ // Retrieves an intermediate representation of the compiled program
+ std::string GetIR() const {
+ auto bytes = size_t{0};
+ CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
+ auto result = std::string{};
+ result.resize(bytes);
+ auto result_ptr = result.data();
+ CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARIES, sizeof(char*), &result_ptr, nullptr));
+ return result;
}
- // Accessors to the private data-member
- const cl_program& operator()() const { return program_; }
+ // Accessor to the private data-member
+ const cl_program& operator()() const { return *program_; }
private:
+ std::shared_ptr<cl_program> program_;
size_t length_;
- std::vector<char> source_;
+ std::string source_;
const char* source_ptr_;
- cl_program program_;
};
// =================================================================================================
-// C++11 version of cl_kernel
-class Kernel: public ObjectWithState {
+// C++11 version of 'cl_command_queue'
+class Queue {
public:
- // Constructor based on the plain C data-type
- explicit Kernel(const cl_kernel kernel): kernel_(kernel) {
- clRetainKernel(kernel_);
+ // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
+ explicit Queue(const cl_command_queue queue):
+ queue_(new cl_command_queue) {
+ *queue_ = queue;
}
- // Memory management
- explicit Kernel(const Program &program, const std::string &name) {
+ // Regular constructor with memory management
+ explicit Queue(const Context &context, const Device &device):
+ queue_(new cl_command_queue, [](cl_command_queue* s) { CheckError(clReleaseCommandQueue(*s));
+ delete s; }) {
auto status = CL_SUCCESS;
- kernel_ = clCreateKernel(program(), name.c_str(), &status);
- if (status != CL_SUCCESS) { Error(status); }
+ *queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
+ CheckError(status);
}
- ~Kernel() {
- clReleaseKernel(kernel_);
+
+ // Synchronizes the queue
+ void Finish(Event &) const {
+ Finish();
}
- Kernel(const Kernel &other):
- kernel_(other.kernel_) {
- clRetainKernel(kernel_);
+ void Finish() const {
+ CheckError(clFinish(*queue_));
}
- Kernel& operator=(Kernel other) {
- swap(*this, other);
- return *this;
+
+ // Retrieves the corresponding context or device
+ Context GetContext() const {
+ auto bytes = size_t{0};
+ CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes));
+ cl_context result;
+ CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr));
+ return Context(result);
}
- friend void swap(Kernel &first, Kernel &second) {
- std::swap(first.kernel_, second.kernel_);
+ Device GetDevice() const {
+ auto bytes = size_t{0};
+ CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes));
+ cl_device_id result;
+ CheckError(clGetCommandQueueInfo(*queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr));
+ return Device(result);
}
- // Public functions
- template <typename T> // Note: doesn't work with T=Buffer
- cl_int SetArgument(const cl_uint index, const T &value) {
- return clSetKernelArg(kernel_, index, sizeof(T), &value);
+ // Accessor to the private data-member
+ const cl_command_queue& operator()() const { return *queue_; }
+ private:
+ std::shared_ptr<cl_command_queue> queue_;
+};
+
+// =================================================================================================
+
+// C++11 version of host memory
+template <typename T>
+class BufferHost {
+ public:
+
+ // Regular constructor with memory management
+ explicit BufferHost(const Context &, const size_t size):
+ buffer_(new std::vector<T>(size)) {
}
- size_t LocalMemUsage(const Device &device) const {
- auto bytes = size_t{0};
- clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, 0, nullptr, &bytes);
- auto result = size_t{0};
- clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, bytes, &result, nullptr);
- return result;
+
+ // Retrieves the actual allocated size in bytes
+ size_t GetSize() const {
+ return buffer_->size()*sizeof(T);
}
- // Accessors to the private data-member
- const cl_kernel& operator()() const { return kernel_; }
+ // Compatibility with std::vector
+ size_t size() const { return buffer_->size(); }
+ T* begin() { return &(*buffer_)[0]; }
+ T* end() { return &(*buffer_)[buffer_->size()-1]; }
+ T& operator[](const size_t i) { return (*buffer_)[i]; }
+ T* data() { return buffer_->data(); }
+ const T* data() const { return buffer_->data(); }
+
private:
- cl_kernel kernel_;
+ std::shared_ptr<std::vector<T>> buffer_;
};
// =================================================================================================
-// C++11 version of cl_command_queue
-class CommandQueue: public ObjectWithState {
+// Enumeration of buffer access types
+enum class BufferAccess { kReadOnly, kWriteOnly, kReadWrite };
+
+// C++11 version of 'cl_mem'
+template <typename T>
+class Buffer {
public:
- // Constructor based on the plain C data-type
- explicit CommandQueue(const cl_command_queue queue): queue_(queue) {
- clRetainCommandQueue(queue_);
+ // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
+ explicit Buffer(const cl_mem buffer):
+ buffer_(new cl_mem),
+ access_(BufferAccess::kReadWrite) {
+ *buffer_ = buffer;
}
- // Memory management
- explicit CommandQueue(const Context &context, const Device &device) {
+ // Regular constructor with memory management
+ explicit Buffer(const Context &context, const BufferAccess access, const size_t size):
+ buffer_(new cl_mem, [](cl_mem* m) { CheckError(clReleaseMemObject(*m)); delete m; }),
+ access_(access) {
+ auto flags = cl_mem_flags{CL_MEM_READ_WRITE};
+ if (access_ == BufferAccess::kReadOnly) { flags = CL_MEM_READ_ONLY; }
+ if (access_ == BufferAccess::kWriteOnly) { flags = CL_MEM_WRITE_ONLY; }
auto status = CL_SUCCESS;
- queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
- if (status != CL_SUCCESS) { Error(status); }
+ *buffer_ = clCreateBuffer(context(), flags, size*sizeof(T), nullptr, &status);
+ CheckError(status);
}
- ~CommandQueue() {
- clReleaseCommandQueue(queue_);
+
+ // As above, but now with read/write access as a default
+ explicit Buffer(const Context &context, const size_t size):
+ Buffer<T>(context, BufferAccess::kReadWrite, size) {
}
- CommandQueue(const CommandQueue &other):
- queue_(other.queue_) {
- clRetainCommandQueue(queue_);
+
+ // Copies from device to host: reading the device buffer a-synchronously
+ void ReadAsync(const Queue &queue, const size_t size, T* host) {
+ if (access_ == BufferAccess::kWriteOnly) { Error("reading from a write-only buffer"); }
+ CheckError(clEnqueueReadBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), host, 0,
+ nullptr, nullptr));
}
- CommandQueue& operator=(CommandQueue other) {
- swap(*this, other);
- return *this;
+ void ReadAsync(const Queue &queue, const size_t size, std::vector<T> &host) {
+ if (host.size() < size) { Error("target host buffer is too small"); }
+ ReadAsync(queue, size, host.data());
}
- friend void swap(CommandQueue &first, CommandQueue &second) {
- std::swap(first.queue_, second.queue_);
+ void ReadAsync(const Queue &queue, const size_t size, BufferHost<T> &host) {
+ if (host.size() < size) { Error("target host buffer is too small"); }
+ ReadAsync(queue, size, host.data());
}
- // Public functions
- cl_int EnqueueKernel(const Kernel &kernel, const std::vector<size_t> &global,
- const std::vector<size_t> &local, Event &event) {
- return clEnqueueNDRangeKernel(queue_, kernel(), static_cast<cl_uint>(global.size()), nullptr,
- global.data(), local.data(), 0, nullptr, &(event()));
+ // Copies from device to host: reading the device buffer
+ void Read(const Queue &queue, const size_t size, T* host) {
+ ReadAsync(queue, size, host);
+ queue.Finish();
}
- Context GetContext() const {
- auto bytes = size_t{0};
- clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes);
- cl_context result;
- clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr);
- return Context(result);
+ void Read(const Queue &queue, const size_t size, std::vector<T> &host) {
+ Read(queue, size, host.data());
}
- Device GetDevice() const {
- auto bytes = size_t{0};
- clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes);
- cl_device_id result;
- clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr);
- return Device(result);
+ void Read(const Queue &queue, const size_t size, BufferHost<T> &host) {
+ Read(queue, size, host.data());
+ }
+
+ // Copies from host to device: writing the device buffer a-synchronously
+ void WriteAsync(const Queue &queue, const size_t size, const T* host) {
+ if (access_ == BufferAccess::kReadOnly) { Error("writing to a read-only buffer"); }
+ if (GetSize() < size*sizeof(T)) { Error("target device buffer is too small"); }
+ CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), host, 0,
+ nullptr, nullptr));
+ }
+ void WriteAsync(const Queue &queue, const size_t size, const std::vector<T> &host) {
+ WriteAsync(queue, size, host.data());
}
- cl_int Finish() {
- return clFinish(queue_);
+ void WriteAsync(const Queue &queue, const size_t size, const BufferHost<T> &host) {
+ WriteAsync(queue, size, host.data());
}
- // Accessors to the private data-member
- const cl_command_queue& operator()() const { return queue_; }
+ // Copies from host to device: writing the device buffer
+ void Write(const Queue &queue, const size_t size, const T* host) {
+ WriteAsync(queue, size, host);
+ queue.Finish();
+ }
+ void Write(const Queue &queue, const size_t size, const std::vector<T> &host) {
+ Write(queue, size, host.data());
+ }
+ void Write(const Queue &queue, const size_t size, const BufferHost<T> &host) {
+ Write(queue, size, host.data());
+ }
+
+ // Copies the contents of this buffer into another device buffer
+ void CopyToAsync(const Queue &queue, const size_t size, const Buffer<T> &destination) {
+ CheckError(clEnqueueCopyBuffer(queue(), *buffer_, destination(), 0, 0, size*sizeof(T), 0,
+ nullptr, nullptr));
+ }
+ void CopyTo(const Queue &queue, const size_t size, const Buffer<T> &destination) {
+ CopyToAsync(queue, size, destination);
+ queue.Finish();
+ }
+
+ // Retrieves the actual allocated size in bytes
+ size_t GetSize() const {
+ auto bytes = size_t{0};
+ CheckError(clGetMemObjectInfo(*buffer_, CL_MEM_SIZE, 0, nullptr, &bytes));
+ auto result = size_t{0};
+ CheckError(clGetMemObjectInfo(*buffer_, CL_MEM_SIZE, bytes, &result, nullptr));
+ return result;
+ }
+
+ // Accessor to the private data-member
+ const cl_mem& operator()() const { return *buffer_; }
private:
- cl_command_queue queue_;
+ std::shared_ptr<cl_mem> buffer_;
+ const BufferAccess access_;
};
// =================================================================================================
-// C++11 version of cl_mem
-class Buffer: public ObjectWithState {
+// C++11 version of 'cl_kernel'
+class Kernel {
public:
- // Constructor based on the plain C data-type
- explicit Buffer(const cl_mem buffer): buffer_(buffer) {
- clRetainMemObject(buffer_);
+ // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
+ explicit Kernel(const cl_kernel kernel):
+ kernel_(new cl_kernel) {
+ *kernel_ = kernel;
}
- // Memory management
- explicit Buffer(const Context &context, const cl_mem_flags flags, const size_t bytes) {
+ // Regular constructor with memory management
+ explicit Kernel(const Program &program, const std::string &name):
+ kernel_(new cl_kernel, [](cl_kernel* k) { CheckError(clReleaseKernel(*k)); delete k; }) {
auto status = CL_SUCCESS;
- buffer_ = clCreateBuffer(context(), flags, bytes, nullptr, &status);
- if (status != CL_SUCCESS) { Error(status); }
- }
- ~Buffer() {
- clReleaseMemObject(buffer_);
- }
- Buffer(const Buffer &other):
- buffer_(other.buffer_) {
- clRetainMemObject(buffer_);
- }
- Buffer& operator=(Buffer other) {
- swap(*this, other);
- return *this;
- }
- friend void swap(Buffer &first, Buffer &second) {
- std::swap(first.buffer_, second.buffer_);
+ *kernel_ = clCreateKernel(program(), name.c_str(), &status);
+ CheckError(status);
}
- // Public functions
+ // Sets a kernel argument at the indicated position
template <typename T>
- cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, T* host) {
- return clEnqueueReadBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
+ void SetArgument(const size_t index, const T &value) {
+ CheckError(clSetKernelArg(*kernel_, static_cast<cl_uint>(index), sizeof(T), &value));
}
template <typename T>
- cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, std::vector<T> &host) {
- return ReadBuffer(queue, bytes, host.data());
+ void SetArgument(const size_t index, Buffer<T> &value) {
+ SetArgument(index, value());
}
- template <typename T>
- cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const T* host) {
- return clEnqueueWriteBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
- }
- template <typename T>
- cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const std::vector<T> &host) {
- return WriteBuffer(queue, bytes, &host[0]);
+
+ // Sets all arguments in one go using parameter packs. Note that this overwrites previously set
+ // arguments using 'SetArgument' or 'SetArguments'.
+ template <typename... Args>
+ void SetArguments(Args&... args) {
+ SetArgumentsRecursive(0, args...);
}
- size_t GetSize() const {
+
+ // Retrieves the amount of local memory used per work-group for this kernel
+ size_t LocalMemUsage(const Device &device) const {
auto bytes = size_t{0};
- auto status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, 0, nullptr, &bytes);
- if (status != CL_SUCCESS) { Error(status); }
+ auto query = cl_kernel_work_group_info{CL_KERNEL_LOCAL_MEM_SIZE};
+ CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, 0, nullptr, &bytes));
auto result = size_t{0};
- status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, bytes, &result, nullptr);
- if (status != CL_SUCCESS) { Error(status); }
+ CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, bytes, &result, nullptr));
return result;
}
- // Accessors to the private data-member
- const cl_mem& operator()() const { return buffer_; }
+ // Launches a kernel onto the specified queue
+ void Launch(const Queue &queue, const std::vector<size_t> &global,
+ const std::vector<size_t> &local, Event &event) {
+ CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
+ nullptr, global.data(), local.data(),
+ 0, nullptr, &(event())));
+ }
+
+ // Accessor to the private data-member
+ const cl_kernel& operator()() const { return *kernel_; }
private:
- cl_mem buffer_;
+ std::shared_ptr<cl_kernel> kernel_;
+
+ // Internal implementation for the recursive SetArguments function.
+ template <typename T>
+ void SetArgumentsRecursive(const size_t index, T &first) {
+ SetArgument(index, first);
+ }
+ template <typename T, typename... Args>
+ void SetArgumentsRecursive(const size_t index, T &first, Args&... args) {
+ SetArgument(index, first);
+ SetArgumentsRecursive(index+1, args...);
+ }
};
// =================================================================================================
diff --git a/include/internal/database.h b/include/internal/database.h
index 33ad1979..8c937e34 100644
--- a/include/internal/database.h
+++ b/include/internal/database.h
@@ -39,7 +39,7 @@ class Database {
const Parameters parameters;
};
struct DatabaseVendor {
- const cl_device_type type;
+ const std::string type;
const std::string name;
const std::vector<DatabaseDevice> devices;
};
@@ -49,8 +49,21 @@ class Database {
const std::vector<DatabaseVendor> vendors;
};
- // The default vendor or device
- static constexpr auto kDefault = "Default";
+ // The OpenCL device types
+ static constexpr auto kDeviceTypeCPU = "CPU";
+ static constexpr auto kDeviceTypeGPU = "GPU";
+ static constexpr auto kDeviceTypeAccelerator = "accelerator";
+ static constexpr auto kDeviceTypeAll = "default";
+
+ // The OpenCL device vendors
+ static constexpr auto kDeviceVendorNVIDIA = "NVIDIA Corporation";
+ static constexpr auto kDeviceVendorAMD = "Advanced Micro Devices, Inc.";
+ static constexpr auto kDeviceVendorIntel = "Intel";
+ static constexpr auto kDeviceVendorAll = "default";
+
+ // The OpenCL device names
+ static constexpr auto kDefaultDevice = "default";
+
// The database consists of separate database entries, stored together in a vector
static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble;
@@ -63,7 +76,7 @@ class Database {
static const std::vector<DatabaseEntry> database;
// The constructor
- explicit Database(const CommandQueue &queue, const std::vector<std::string> &routines,
+ explicit Database(const Queue &queue, const std::vector<std::string> &routines,
const Precision precision);
// Accessor of values by key
@@ -73,13 +86,10 @@ class Database {
std::string GetDefines() const;
private:
- Parameters Search(const std::string &this_kernel, const cl_device_type this_type,
+ Parameters Search(const std::string &this_kernel, const std::string &this_type,
const std::string &this_vendor, const std::string &this_device,
const Precision this_precision) const;
- // Tests equality between a database-vendor string and an OpenCL vendor string
- bool VendorEqual(const std::string &db_vendor, const std::string &cl_vendor) const;
-
// Found parameters suitable for this device/kernel
Parameters parameters_;
};
diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h
index dfd69b80..541a352b 100644
--- a/include/internal/database/copy.h
+++ b/include/internal/database/copy.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::CopySingle = {
"Copy", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } },
{ "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::CopySingle = {
const Database::DatabaseEntry Database::CopyDouble = {
"Copy", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
{ "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
{ "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
}
@@ -74,25 +74,25 @@ const Database::DatabaseEntry Database::CopyDouble = {
const Database::DatabaseEntry Database::CopyComplexSingle = {
"Copy", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } },
{ "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } },
{ "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
}
@@ -103,24 +103,24 @@ const Database::DatabaseEntry Database::CopyComplexSingle = {
const Database::DatabaseEntry Database::CopyComplexDouble = {
"Copy", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } },
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } },
}
},
}
diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h
index 61ec3242..4a599648 100644
--- a/include/internal/database/pad.h
+++ b/include/internal/database/pad.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::PadSingle = {
"Pad", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
{ "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::PadSingle = {
const Database::DatabaseEntry Database::PadDouble = {
"Pad", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
@@ -74,25 +74,25 @@ const Database::DatabaseEntry Database::PadDouble = {
const Database::DatabaseEntry Database::PadComplexSingle = {
"Pad", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
@@ -103,24 +103,24 @@ const Database::DatabaseEntry Database::PadComplexSingle = {
const Database::DatabaseEntry Database::PadComplexDouble = {
"Pad", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
{ "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
}
},
}
diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h
index 8f6fcba0..53226c1d 100644
--- a/include/internal/database/padtranspose.h
+++ b/include/internal/database/padtranspose.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::PadTraSingle = {
"PadTranspose", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
{ "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
{ "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::PadTraSingle = {
const Database::DatabaseEntry Database::PadTraDouble = {
"PadTranspose", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
}
},
}
@@ -74,25 +74,25 @@ const Database::DatabaseEntry Database::PadTraDouble = {
const Database::DatabaseEntry Database::PadTraComplexSingle = {
"PadTranspose", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
}
},
}
@@ -103,24 +103,24 @@ const Database::DatabaseEntry Database::PadTraComplexSingle = {
const Database::DatabaseEntry Database::PadTraComplexDouble = {
"PadTranspose", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
{ "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } },
}
},
}
diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h
index b348f364..1d12a13e 100644
--- a/include/internal/database/transpose.h
+++ b/include/internal/database/transpose.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::TraSingle = {
"Transpose", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::TraSingle = {
const Database::DatabaseEntry Database::TraDouble = {
"Transpose", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
}
},
}
@@ -74,25 +74,25 @@ const Database::DatabaseEntry Database::TraDouble = {
const Database::DatabaseEntry Database::TraComplexSingle = {
"Transpose", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
}
},
}
@@ -103,24 +103,24 @@ const Database::DatabaseEntry Database::TraComplexSingle = {
const Database::DatabaseEntry Database::TraComplexDouble = {
"Transpose", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } },
}
},
}
diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h
index 40747678..058e3c0a 100644
--- a/include/internal/database/xaxpy.h
+++ b/include/internal/database/xaxpy.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::XaxpySingle = {
"Xaxpy", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } },
{ "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } },
{ "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::XaxpySingle = {
const Database::DatabaseEntry Database::XaxpyDouble = {
"Xaxpy", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } },
{ "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } },
{ "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
}
},
}
@@ -73,25 +73,25 @@ const Database::DatabaseEntry Database::XaxpyDouble = {
const Database::DatabaseEntry Database::XaxpyComplexSingle = {
"Xaxpy", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
{ "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } },
{ "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
}
},
}
@@ -102,24 +102,24 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = {
const Database::DatabaseEntry Database::XaxpyComplexDouble = {
"Xaxpy", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } },
{ "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } },
{ "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } },
}
},
}
diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h
index c2fe9bcb..49598c8c 100644
--- a/include/internal/database/xgemm.h
+++ b/include/internal/database/xgemm.h
@@ -17,26 +17,26 @@ namespace clblast {
const Database::DatabaseEntry Database::XgemmSingle = {
"Xgemm", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
{ "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
{ "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
- { kDefault, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
+ { kDefaultDevice, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",8}, {"KWI",2}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
}
@@ -47,25 +47,25 @@ const Database::DatabaseEntry Database::XgemmSingle = {
const Database::DatabaseEntry Database::XgemmDouble = {
"Xgemm", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
{ "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
{ "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } },
- { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
}
@@ -76,26 +76,26 @@ const Database::DatabaseEntry Database::XgemmDouble = {
const Database::DatabaseEntry Database::XgemmComplexSingle = {
"Xgemm", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
{ "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } },
{ "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } },
- { kDefault, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
+ { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
}
@@ -106,25 +106,25 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = {
const Database::DatabaseEntry Database::XgemmComplexDouble = {
"Xgemm", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
{ "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } },
{ "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } },
- { kDefault, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
+ { kDefaultDevice, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } },
}
},
}
diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h
index 0266dd3c..c315500f 100644
--- a/include/internal/database/xgemv.h
+++ b/include/internal/database/xgemv.h
@@ -17,25 +17,25 @@ namespace clblast {
const Database::DatabaseEntry Database::XgemvSingle = {
"Xgemv", Precision::kSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"WGS2",256}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",4} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -46,24 +46,24 @@ const Database::DatabaseEntry Database::XgemvSingle = {
const Database::DatabaseEntry Database::XgemvDouble = {
"Xgemv", Precision::kDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -73,25 +73,25 @@ const Database::DatabaseEntry Database::XgemvDouble = {
const Database::DatabaseEntry Database::XgemvComplexSingle = {
"Xgemv", Precision::kComplexSingle, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
{ "Iris", { {"WGS1",256}, {"WPT1",1}, {"WGS2",64}, {"WPT2",4}, {"VW2",2}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
@@ -102,24 +102,24 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = {
const Database::DatabaseEntry Database::XgemvComplexDouble = {
"Xgemv", Precision::kComplexDouble, {
{ // NVIDIA GPUs
- CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", {
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // AMD GPUs
- CL_DEVICE_TYPE_GPU, "Advanced Micro Devices, Inc.", {
+ kDeviceTypeGPU, kDeviceVendorAMD, {
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
{ // Intel GPUs
- CL_DEVICE_TYPE_GPU, "Intel", {
+ kDeviceTypeGPU, kDeviceVendorIntel, {
}
},
{ // Default
- CL_DEVICE_TYPE_ALL, kDefault, {
- { kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } },
}
},
}
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 911bda49..367917fd 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -26,6 +26,7 @@ namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
+template <typename T>
class Routine {
public:
@@ -52,7 +53,7 @@ class Routine {
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
// Base class constructor
- explicit Routine(CommandQueue &queue, Event &event, const std::string &name,
+ explicit Routine(Queue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision);
// Set-up phase of the kernel
@@ -61,31 +62,31 @@ class Routine {
protected:
// Runs a kernel given the global and local thread sizes
- StatusCode RunKernel(const Kernel &kernel, std::vector<size_t> &global,
+ StatusCode RunKernel(Kernel &kernel, std::vector<size_t> &global,
const std::vector<size_t> &local);
// Tests for valid inputs of matrices A, B, and C
- StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer &buffer,
+ StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld, const size_t data_size);
- StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer &buffer,
+ StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld, const size_t data_size);
- StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer &buffer,
+ StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld, const size_t data_size);
// Tests for valid inputs of vectors X and Y
- StatusCode TestVectorX(const size_t n, const Buffer &buffer, const size_t offset,
+ StatusCode TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset,
const size_t inc, const size_t data_size);
- StatusCode TestVectorY(const size_t n, const Buffer &buffer, const size_t offset,
+ StatusCode TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset,
const size_t inc, const size_t data_size);
// Copies/transposes a matrix and padds/unpads it with zeroes. This method is also able to write
// to symmetric and triangular matrices through optional arguments.
StatusCode PadCopyTransposeMatrix(const size_t src_one, const size_t src_two,
const size_t src_ld, const size_t src_offset,
- const Buffer &src,
+ const Buffer<T> &src,
const size_t dest_one, const size_t dest_two,
const size_t dest_ld, const size_t dest_offset,
- const Buffer &dest,
+ const Buffer<T> &dest,
const Program &program, const bool do_pad,
const bool do_transpose, const bool do_conjugate,
const bool upper = false, const bool lower = false,
@@ -106,14 +107,14 @@ class Routine {
std::string source_string_;
// The OpenCL objects, accessible only from derived classes
- CommandQueue queue_;
+ Queue queue_;
Event event_;
const Context context_;
const Device device_;
// OpenCL device properties
const std::string device_name_;
- const cl_uint max_work_item_dimensions_;
+ const size_t max_work_item_dimensions_;
const std::vector<size_t> max_work_item_sizes_;
const size_t max_work_group_size_;
diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h
index e548e553..4b9da890 100644
--- a/include/internal/routines/level1/xaxpy.h
+++ b/include/internal/routines/level1/xaxpy.h
@@ -21,14 +21,26 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T>
-class Xaxpy: public Routine {
+class Xaxpy: public Routine<T> {
public:
- Xaxpy(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestVectorY;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xaxpy(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoAxpy(const size_t n, const T alpha,
- const Buffer &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer &y_buffer, const size_t y_offset, const size_t y_inc);
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h
index a3109036..5ada9b03 100644
--- a/include/internal/routines/level2/xgemv.h
+++ b/include/internal/routines/level2/xgemv.h
@@ -21,18 +21,31 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T>
-class Xgemv: public Routine {
+class Xgemv: public Routine<T> {
public:
- Xgemv(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestVectorY;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xgemv(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoGemv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
- const Buffer &y_buffer, const size_t y_offset, const size_t y_inc);
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h
index 7ad4fcfb..a0c8b595 100644
--- a/include/internal/routines/level3/xgemm.h
+++ b/include/internal/routines/level3/xgemm.h
@@ -21,18 +21,33 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T>
-class Xgemm: public Routine {
+class Xgemm: public Routine<T> {
public:
- Xgemm(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::PadCopyTransposeMatrix;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixB;
+ using Routine<T>::TestMatrixC;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xgemm(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
const size_t m, const size_t n, const size_t k,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h
index 6cc9d9ec..5f1e8723 100644
--- a/include/internal/routines/level3/xhemm.h
+++ b/include/internal/routines/level3/xhemm.h
@@ -25,30 +25,28 @@ template <typename T>
class Xhemm: public Xgemm<T> {
public:
- // Uses several variables from the Routine class
- using Routine::db_;
- using Routine::context_;
-
- // Uses several helper functions from the Routine class
- using Routine::RunKernel;
- using Routine::ErrorIn;
- using Routine::TestMatrixA;
- using Routine::GetProgramFromCache;
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
// Uses the regular Xgemm routine
using Xgemm<T>::DoGemm;
// Constructor
- Xhemm(CommandQueue &queue, Event &event);
+ Xhemm(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h
index 1836a812..9e961d23 100644
--- a/include/internal/routines/level3/xher2k.h
+++ b/include/internal/routines/level3/xher2k.h
@@ -23,18 +23,33 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T, typename U>
-class Xher2k: public Routine {
+class Xher2k: public Routine<T> {
public:
- Xher2k(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::PadCopyTransposeMatrix;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixB;
+ using Routine<T>::TestMatrixC;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xher2k(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const U beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h
index 9b361254..f285a71c 100644
--- a/include/internal/routines/level3/xherk.h
+++ b/include/internal/routines/level3/xherk.h
@@ -23,17 +23,31 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T, typename U>
-class Xherk: public Routine {
+class Xherk: public Routine<T> {
public:
- Xherk(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::PadCopyTransposeMatrix;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixC;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xherk(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
const U alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const U beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h
index 2028ceea..9ed3c722 100644
--- a/include/internal/routines/level3/xsymm.h
+++ b/include/internal/routines/level3/xsymm.h
@@ -27,30 +27,28 @@ template <typename T>
class Xsymm: public Xgemm<T> {
public:
- // Uses several variables from the Routine class
- using Routine::db_;
- using Routine::context_;
-
- // Uses several helper functions from the Routine class
- using Routine::RunKernel;
- using Routine::ErrorIn;
- using Routine::TestMatrixA;
- using Routine::GetProgramFromCache;
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
// Uses the regular Xgemm routine
using Xgemm<T>::DoGemm;
// Constructor
- Xsymm(CommandQueue &queue, Event &event);
+ Xsymm(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h
index 6259313c..85936658 100644
--- a/include/internal/routines/level3/xsyr2k.h
+++ b/include/internal/routines/level3/xsyr2k.h
@@ -23,18 +23,33 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T>
-class Xsyr2k: public Routine {
+class Xsyr2k: public Routine<T> {
public:
- Xsyr2k(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::PadCopyTransposeMatrix;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixB;
+ using Routine<T>::TestMatrixC;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xsyr2k(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h
index 3dab731f..14d51a58 100644
--- a/include/internal/routines/level3/xsyrk.h
+++ b/include/internal/routines/level3/xsyrk.h
@@ -25,17 +25,31 @@ namespace clblast {
// See comment at top of file for a description of the class
template <typename T>
-class Xsyrk: public Routine {
+class Xsyrk: public Routine<T> {
public:
- Xsyrk(CommandQueue &queue, Event &event);
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::PadCopyTransposeMatrix;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixC;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xsyrk(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const T beta,
- const Buffer &c_buffer, const size_t c_offset, const size_t c_ld);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
private:
// Static variable to get the precision
diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h
index 4f49bebd..d8ac60fd 100644
--- a/include/internal/routines/level3/xtrmm.h
+++ b/include/internal/routines/level3/xtrmm.h
@@ -26,29 +26,27 @@ template <typename T>
class Xtrmm: public Xgemm<T> {
public:
- // Uses several variables from the Routine class
- using Routine::db_;
- using Routine::context_;
-
- // Uses several helper functions from the Routine class
- using Routine::RunKernel;
- using Routine::ErrorIn;
- using Routine::TestMatrixA;
- using Routine::GetProgramFromCache;
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
// Uses the regular Xgemm routine
using Xgemm<T>::DoGemm;
// Constructor
- Xtrmm(CommandQueue &queue, Event &event);
+ Xtrmm(Queue &queue, Event &event);
// Templated-precision implementation of the routine
StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle,
const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
const T alpha,
- const Buffer &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer &b_buffer, const size_t b_offset, const size_t b_ld);
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
};
// =================================================================================================
diff --git a/include/internal/utilities.h b/include/internal/utilities.h
index 60d70eae..6dba24e1 100644
--- a/include/internal/utilities.h
+++ b/include/internal/utilities.h
@@ -131,12 +131,13 @@ struct Arguments {
};
// Structure containing all possible buffers for test clients
+template <typename T>
struct Buffers {
- Buffer x_vec;
- Buffer y_vec;
- Buffer a_mat;
- Buffer b_mat;
- Buffer c_mat;
+ Buffer<T> x_vec;
+ Buffer<T> y_vec;
+ Buffer<T> a_mat;
+ Buffer<T> b_mat;
+ Buffer<T> c_mat;
};
// =================================================================================================