summaryrefslogtreecommitdiff
path: root/src/clpp11.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/clpp11.hpp')
-rw-r--r--src/clpp11.hpp120
1 files changed, 76 insertions, 44 deletions
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index b834d8b4..d57223dd 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -72,15 +72,24 @@ inline void CheckError(const cl_int status) {
class Event {
public:
- // Constructor based on the regular OpenCL data-type
- explicit Event(const cl_event event): event_(event) { }
+ // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere
+ explicit Event(const cl_event event):
+ event_(new cl_event) {
+ *event_ = event;
+ }
- // Regular constructor
- explicit Event(): event_(nullptr) { }
+ // Regular constructor with memory management
+ explicit Event():
+ event_(new cl_event, [](cl_event* e) {
+ if (*e) { CheckError(clReleaseEvent(*e)); }
+ delete e;
+ }) {
+ *event_ = nullptr;
+ }
// Waits for completion of this event
void WaitForCompletion() const {
- CheckError(clWaitForEvents(1, &event_));
+ CheckError(clWaitForEvents(1, &(*event_)));
}
// Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
@@ -89,20 +98,22 @@ class Event {
float GetElapsedTime() const {
WaitForCompletion();
auto bytes = size_t{0};
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
+ clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
auto time_start = size_t{0};
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
+ clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
+ clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
auto time_end = size_t{0};
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
+ clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
return (time_end - time_start) * 1.0e-6f;
}
// Accessor to the private data-member
- cl_event& operator()() { return event_; }
- cl_event* pointer() { return &event_; }
+ cl_event& operator()() { return *event_; }
+ const cl_event& operator()() const { return *event_; }
+ cl_event* pointer() { return &(*event_); }
+ const cl_event* pointer() const { return &(*event_); }
private:
- cl_event event_;
+ std::shared_ptr<cl_event> event_;
};
// Pointer to an OpenCL event
@@ -163,6 +174,15 @@ class Device {
// Methods to retrieve device information
std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
+ size_t VersionNumber() const
+ {
+ std::string version_string = Version().substr(7);
+ // Space separates the end of the OpenCL version number from the beginning of the
+ // vendor-specific information.
+ size_t next_whitespace = version_string.find(' ');
+ size_t version = (size_t) (100.0 * std::stod(version_string.substr(0, next_whitespace)));
+ return version;
+ }
std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
std::string Type() const {
@@ -176,24 +196,32 @@ class Device {
}
size_t MaxWorkGroupSize() const { return GetInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); }
size_t MaxWorkItemDimensions() const {
- return GetInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
+ return static_cast<size_t>(GetInfo<cl_uint>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS));
}
std::vector<size_t> MaxWorkItemSizes() const {
return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
}
- size_t LocalMemSize() const {
- return static_cast<size_t>(GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE));
+ cl_ulong LocalMemSize() const {
+ return GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE);
}
std::string Capabilities() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
- size_t CoreClock() const { return GetInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY); }
- size_t ComputeUnits() const { return GetInfo(CL_DEVICE_MAX_COMPUTE_UNITS); }
- size_t MemorySize() const { return GetInfo(CL_DEVICE_GLOBAL_MEM_SIZE); }
- size_t MaxAllocSize() const { return GetInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE); }
+ size_t CoreClock() const {
+ return static_cast<size_t>(GetInfo<cl_uint>(CL_DEVICE_MAX_CLOCK_FREQUENCY));
+ }
+ size_t ComputeUnits() const {
+ return static_cast<size_t>(GetInfo<cl_uint>(CL_DEVICE_MAX_COMPUTE_UNITS));
+ }
+ unsigned long MemorySize() const {
+ return static_cast<unsigned long>(GetInfo<cl_ulong>(CL_DEVICE_GLOBAL_MEM_SIZE));
+ }
+ unsigned long MaxAllocSize() const {
+ return static_cast<unsigned long>(GetInfo<cl_ulong>(CL_DEVICE_MAX_MEM_ALLOC_SIZE));
+ }
size_t MemoryClock() const { return 0; } // Not exposed in OpenCL
size_t MemoryBusWidth() const { return 0; } // Not exposed in OpenCL
// Configuration-validity checks
- bool IsLocalMemoryValid(const size_t local_mem_usage) const {
+ bool IsLocalMemoryValid(const cl_ulong local_mem_usage) const {
return (local_mem_usage <= LocalMemSize());
}
bool IsThreadConfigValid(const std::vector<size_t> &local) const {
@@ -211,6 +239,8 @@ class Device {
bool IsCPU() const { return Type() == "CPU"; }
bool IsGPU() const { return Type() == "GPU"; }
bool IsAMD() const { return Vendor() == "AMD" || Vendor() == "Advanced Micro Devices, Inc."; }
+ bool IsNVIDIA() const { return Vendor() == "NVIDIA" || Vendor() == "NVIDIA Corporation"; }
+ bool IsIntel() const { return Vendor() == "Intel" || Vendor() == "GenuineIntel"; }
bool IsARM() const { return Vendor() == "ARM"; }
// Accessor to the private data-member
@@ -227,13 +257,6 @@ class Device {
CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
return result;
}
- size_t GetInfo(const cl_device_info info) const {
- auto bytes = size_t{0};
- CheckError(clGetDeviceInfo(device_, info, 0, nullptr, &bytes));
- auto result = cl_uint(0);
- CheckError(clGetDeviceInfo(device_, info, bytes, &result, nullptr));
- return static_cast<size_t>(result);
- }
template <typename T>
std::vector<T> GetInfoVector(const cl_device_info info) const {
auto bytes = size_t{0};
@@ -386,8 +409,16 @@ class Queue {
delete s; }) {
auto status = CL_SUCCESS;
#ifdef CL_VERSION_2_0
- cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
- *queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
+ size_t ocl_version = device.VersionNumber();
+ if (ocl_version >= 200)
+ {
+ cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
+ *queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
+ }
+ else
+ {
+ *queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
+ }
#else
*queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
#endif
@@ -627,15 +658,25 @@ class Kernel {
}
// Retrieves the amount of local memory used per work-group for this kernel
- size_t LocalMemUsage(const Device &device) const {
+ cl_ulong LocalMemUsage(const Device &device) const {
auto bytes = size_t{0};
auto query = cl_kernel_work_group_info{CL_KERNEL_LOCAL_MEM_SIZE};
CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, 0, nullptr, &bytes));
- auto result = size_t{0};
+ auto result = cl_ulong{0};
CheckError(clGetKernelWorkGroupInfo(*kernel_, device(), query, bytes, &result, nullptr));
return result;
}
+ // Retrieves the name of the kernel
+ std::string GetFunctionName() {
+ auto bytes = size_t{0};
+ CheckError(clGetKernelInfo(*kernel_, CL_KERNEL_FUNCTION_NAME, 0, nullptr, &bytes));
+ auto result = std::string{};
+ result.resize(bytes);
+ CheckError(clGetKernelInfo(*kernel_, CL_KERNEL_FUNCTION_NAME, bytes, &result[0], nullptr));
+ return std::string{result.c_str()}; // Removes any trailing '\0'-characters
+ }
+
// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, EventPointer event) {
@@ -647,30 +688,21 @@ class Kernel {
// As above, but with an event waiting list
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, EventPointer event,
- std::vector<Event>& waitForEvents) {
- if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
-
+ const std::vector<Event> &waitForEvents) {
// Builds a plain version of the events waiting list
auto waitForEventsPlain = std::vector<cl_event>();
for (auto &waitEvent : waitForEvents) {
- waitForEventsPlain.push_back(waitEvent());
+ if (waitEvent()) { waitForEventsPlain.push_back(waitEvent()); }
}
// Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
- nullptr, global.data(), local.data(),
+ nullptr, global.data(), !local.empty() ? local.data() : nullptr,
static_cast<cl_uint>(waitForEventsPlain.size()),
- waitForEventsPlain.data(),
+ !waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr,
event));
}
- // As above, but with the default local workgroup size
- void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
- CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
- nullptr, global.data(), nullptr,
- 0, nullptr, event));
- }
-
// Accessor to the private data-member
const cl_kernel& operator()() const { return *kernel_; }
private: