summaryrefslogtreecommitdiff
path: root/include/internal/clpp11.h
blob: 73040fdb2c198c98ce9c32dcb3264189628e14d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
//   Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a C++11 wrapper around some OpenCL C data-types, similar to Khronos' cl.hpp.
// The main differences are modern C++11 support and a straightforward implemenation of the basic
// needs (as required for this project). It also includes some extra functionality not available
// in cl.hpp, such as including the sources with a Program object and querying a Kernel's validity
// in terms of local memory usage.
//
// This file is adapted from the C++ bindings from the CLTune project and therefore contains the
// following copyright notice:
//
// =================================================================================================
//
// Copyright 2014 SURFsara
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//  http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// =================================================================================================

#ifndef CLBLAST_CLPP11_H_
#define CLBLAST_CLPP11_H_

#include <utility> // std::swap
#include <algorithm> // std::copy
#include <string> // std::string
#include <vector> // std::vector
#include <stdexcept> // std::runtime_error

// Includes the normal OpenCL C header
#if defined(__APPLE__) || defined(__MACOSX)
  #include <OpenCL/opencl.h>
#else
  #include <CL/opencl.h>
#endif

namespace clblast {
// =================================================================================================

// Base class for any object
class Object {
 protected:

  // Error handling (NOTE: these functions are [[noreturn]])
  void Error(const std::string &message) const {
    throw std::runtime_error("Internal OpenCL error: "+message);
  }
  void Error(const cl_int status) const {
    throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status));
  }
};

// =================================================================================================

// Base class for objects which require memory management
class ObjectWithState: public Object {

};

// =================================================================================================

// C++11 version of cl_event
class Event: public Object {
 public:

  // Constructor based on the plain C data-type
  explicit Event(const cl_event event): event_(event) { }

  // New event
  Event(): event_() {}

  // Public functions
  size_t GetProfilingStart() const {
    auto bytes = size_t{0};
    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
    auto result = size_t{0};
    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &result, nullptr);
    return result;
  }
  size_t GetProfilingEnd() const {
    auto bytes = size_t{0};
    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
    auto result = size_t{0};
    clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &result, nullptr);
    return result;
  }
  cl_int Wait() const {
    return clWaitForEvents(1, &event_);
  }

  // Accessors to the private data-member
  cl_event operator()() const { return event_; }
  cl_event& operator()() { return event_; }
 private:
  cl_event event_;
};

// =================================================================================================

// C++11 version of cl_platform_id
class Platform: public Object {
 public:

  // Constructor based on the plain C data-type
  explicit Platform(const cl_platform_id platform): platform_(platform) { }

  // Initialize the platform. Note that this constructor can throw exceptions!
  explicit Platform(const size_t platform_id) {
    auto num_platforms = cl_uint{0};
    auto status = clGetPlatformIDs(0, nullptr, &num_platforms);
    if (status != CL_SUCCESS) { Error(status); }
    if (num_platforms == 0) { Error("no platforms found"); }
    auto platforms = std::vector<cl_platform_id>(num_platforms);
    status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
    if (status != CL_SUCCESS) { Error(status); }
    if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
    platform_ = platforms[platform_id];
  }

  // Accessors to the private data-member
  cl_platform_id operator()() const { return platform_; }
  cl_platform_id& operator()() { return platform_; }
 private:
  cl_platform_id platform_;
};

// =================================================================================================

// C++11 version of cl_device_id
class Device: public Object {
 public:

  // Constructor based on the plain C data-type
  explicit Device(const cl_device_id device): device_(device) { }

  // Initialize the device. Note that this constructor can throw exceptions!
  explicit Device(const Platform &platform, const cl_device_type type, const size_t device_id) {
    auto num_devices = cl_uint{0};
    auto status = clGetDeviceIDs(platform(), type, 0, nullptr, &num_devices);
    if (status != CL_SUCCESS) { Error(status); }
    if (num_devices == 0) { Error("no devices found"); }
    auto devices = std::vector<cl_device_id>(num_devices);
    status = clGetDeviceIDs(platform(), type, num_devices, devices.data(), nullptr);
    if (status != CL_SUCCESS) { Error(status); }
    if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
    device_ = devices[device_id];
  }

  // Public functions
  std::string Version()     const { return GetInfoString(CL_DEVICE_VERSION); }
  cl_device_type Type()     const { return GetInfo<cl_device_type>(CL_DEVICE_TYPE); }
  std::string Vendor()      const { return GetInfoString(CL_DEVICE_VENDOR); }
  std::string Name()        const { return GetInfoString(CL_DEVICE_NAME); }
  std::string Extensions()  const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
  size_t MaxWorkGroupSize() const { return GetInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); }
  cl_ulong LocalMemSize()   const { return GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE); }
  cl_uint MaxWorkItemDimensions() const {
    return GetInfo<cl_uint>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
  }
  std::vector<size_t> MaxWorkItemSizes() const {
    return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
  }

  // Configuration-validity checks
  bool IsLocalMemoryValid(const size_t local_mem_usage) const {
    return (local_mem_usage <= LocalMemSize());
  }
  bool IsThreadConfigValid(const std::vector<size_t> &local) const {
    auto local_size = size_t{1};
    for (auto &item: local) { local_size *= item; }
    for (auto i=size_t{0}; i<local.size(); ++i) {
      if (local[i] > MaxWorkItemSizes()[i]) { return false; }
    }
    if (local_size > MaxWorkGroupSize()) { return false; }
    if (local.size() > MaxWorkItemDimensions()) { return false; }
    return true;
  }

  // Accessors to the private data-member
  cl_device_id operator()() const { return device_; }
  cl_device_id& operator()() { return device_; }
 private:

  // Helper functions
  template <typename T>
  T GetInfo(const cl_device_info info) const {
    auto bytes = size_t{0};
    clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
    auto result = T(0);
    clGetDeviceInfo(device_, info, bytes, &result, nullptr);
    return result;
  }
  template <typename T>
  std::vector<T> GetInfoVector(const cl_device_info info) const {
    auto bytes = size_t{0};
    clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
    auto result = std::vector<T>(bytes/sizeof(T));
    clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
    return result;
  }
  std::string GetInfoString(const cl_device_info info) const {
    auto bytes = size_t{0};
    clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
    auto result = std::vector<char>(bytes);
    clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
    return std::string(result.data());
  }

  cl_device_id device_;
};

// =================================================================================================

// C++11 version of cl_context
class Context: public ObjectWithState {
 public:

  // Constructor based on the plain C data-type
  explicit Context(const cl_context context): context_(context) {
    clRetainContext(context_);
  }

  // Memory management
  explicit Context(const Device &device) {
    auto status = CL_SUCCESS;
    const cl_device_id dev = device();
    context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
    if (status != CL_SUCCESS) { Error(status); }
  }
  ~Context() {
    clReleaseContext(context_);
  }
  Context(const Context &other):
    context_(other.context_) {
    clRetainContext(context_);
  }
  Context& operator=(Context other) {
    swap(*this, other);
    return *this;
  }
  friend void swap(Context &first, Context &second) {
    std::swap(first.context_, second.context_);
  }

  // Accessors to the private data-member
  cl_context operator()() const { return context_; }
  cl_context& operator()() { return context_; }
 private:
  cl_context context_;
};

// =================================================================================================

// C++11 version of cl_program. Additionally holds the program's source code.
class Program: public ObjectWithState {
 public:

  // Note that there is no constructor based on the plain C data-type because of extra state

  // Memory management
  explicit Program(const Context &context, const std::string &source):
    length_(source.length()) {
      std::copy(source.begin(), source.end(), back_inserter(source_));
      source_ptr_ = source_.data();
      auto status = CL_SUCCESS;
      program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
      if (status != CL_SUCCESS) { Error(status); }
    }
  ~Program() {
    clReleaseProgram(program_);
  }
  Program(const Program &other):
      length_(other.length_),
      source_(other.source_),
      source_ptr_(other.source_ptr_),
      program_(other.program_) {
    clRetainProgram(program_);
  }
  Program& operator=(Program other) {
    swap(*this, other);
    return *this;
  }
  /*
  TODO: Implement move construction/assignment?
  Program(Program &&other) {
    clRetainProgram(program_);
    swap(*this, other);
  }
  Program& operator=(Program &&other) {
    swap(*this, other);
    return *this;
  }*/
  friend void swap(Program &first, Program &second) {
    std::swap(first.length_, second.length_);
    std::swap(first.source_, second.source_);
    std::swap(first.source_ptr_, second.source_ptr_);
    std::swap(first.program_, second.program_);
  }

  // Public functions
  cl_int Build(const Device &device, const std::string &options) {
    const cl_device_id dev = device();
    return clBuildProgram(program_, 1, &dev, options.c_str(), nullptr, nullptr);
  }
  std::string GetBuildInfo(const Device &device) const {
    auto bytes = size_t{0};
    clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &bytes);
    auto result = std::vector<char>(bytes);
    clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, bytes, result.data(), nullptr);
    return std::string(result.data());
  }

  // Accessors to the private data-member
  cl_program operator()() const { return program_; }
  cl_program& operator()() { return program_; }
 private:
  size_t length_;
  std::vector<char> source_;
  const char* source_ptr_;
  cl_program program_;
};

// =================================================================================================

// C++11 version of cl_kernel
class Kernel: public ObjectWithState {
 public:

  // Constructor based on the plain C data-type
  explicit Kernel(const cl_kernel kernel): kernel_(kernel) {
    clRetainKernel(kernel_);
  }

  // Memory management
  explicit Kernel(const Program &program, const std::string &name) {
    auto status = CL_SUCCESS;
    kernel_ = clCreateKernel(program(), name.c_str(), &status);
    if (status != CL_SUCCESS) { Error(status); }
  }
  ~Kernel() {
    clReleaseKernel(kernel_);
  }
  Kernel(const Kernel &other):
    kernel_(other.kernel_) {
    clRetainKernel(kernel_);
  }
  Kernel& operator=(Kernel other) {
    swap(*this, other);
    return *this;
  }
  friend void swap(Kernel &first, Kernel &second) {
    std::swap(first.kernel_, second.kernel_);
  }

  // Public functions
  template <typename T> // Note: doesn't work with T=Buffer
  cl_int SetArgument(const cl_uint index, const T &value) {
    return clSetKernelArg(kernel_, index, sizeof(T), &value);
  }
  size_t LocalMemUsage(const Device &device) const {
    auto bytes = size_t{0};
    clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, 0, nullptr, &bytes);
    auto result = size_t{0};
    clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, bytes, &result, nullptr);
    return result;
  }

  // Accessors to the private data-member
  cl_kernel operator()() const { return kernel_; }
  cl_kernel& operator()() { return kernel_; }
 private:
  cl_kernel kernel_;
};

// =================================================================================================

// C++11 version of cl_command_queue
class CommandQueue: public ObjectWithState {
 public:

  // Constructor based on the plain C data-type
  explicit CommandQueue(const cl_command_queue queue): queue_(queue) {
    clRetainCommandQueue(queue_);
  }

  // Memory management
  explicit CommandQueue(const Context &context, const Device &device) {
    auto status = CL_SUCCESS;
    queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
    if (status != CL_SUCCESS) { Error(status); }
  }
  ~CommandQueue() {
    clReleaseCommandQueue(queue_);
  }
  CommandQueue(const CommandQueue &other):
    queue_(other.queue_) {
    clRetainCommandQueue(queue_);
  }
  CommandQueue& operator=(CommandQueue other) {
    swap(*this, other);
    return *this;
  }
  friend void swap(CommandQueue &first, CommandQueue &second) {
    std::swap(first.queue_, second.queue_);
  }

  // Public functions
  cl_int EnqueueKernel(const Kernel &kernel, const std::vector<size_t> &global,
                       const std::vector<size_t> &local, Event &event) {
    return clEnqueueNDRangeKernel(queue_, kernel(), static_cast<cl_uint>(global.size()), nullptr,
                                  global.data(), local.data(), 0, nullptr, &(event()));
  }
  Context GetContext() const {
    auto bytes = size_t{0};
    clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes);
    cl_context result;
    clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr);
    return Context(result);
  }
  Device GetDevice() const {
    auto bytes = size_t{0};
    clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes);
    cl_device_id result;
    clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr);
    return Device(result);
  }
  cl_int Finish() {
    return clFinish(queue_);
  }

  // Accessors to the private data-member
  cl_command_queue operator()() const { return queue_; }
  cl_command_queue& operator()() { return queue_; }
 private:
  cl_command_queue queue_;
};

// =================================================================================================

// C++11 version of cl_mem
class Buffer: public ObjectWithState {
 public:

  // Constructor based on the plain C data-type
  explicit Buffer(const cl_mem buffer): buffer_(buffer) {
    clRetainMemObject(buffer_);
  }

  // Memory management
  explicit Buffer(const Context &context, const cl_mem_flags flags, const size_t bytes) {
    auto status = CL_SUCCESS;
    buffer_ = clCreateBuffer(context(), flags, bytes, nullptr, &status);
    if (status != CL_SUCCESS) { Error(status); }
  }
  ~Buffer() {
    clReleaseMemObject(buffer_);
  }
  Buffer(const Buffer &other):
    buffer_(other.buffer_) {
    clRetainMemObject(buffer_);
  }
  Buffer& operator=(Buffer other) {
    swap(*this, other);
    return *this;
  }
  friend void swap(Buffer &first, Buffer &second) {
    std::swap(first.buffer_, second.buffer_);
  }

  // Public functions
  template <typename T>
  cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, T* host) {
    return clEnqueueReadBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
  }
  template <typename T>
  cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, std::vector<T> &host) {
    return ReadBuffer(queue, bytes, host.data());
  }
  template <typename T>
  cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const T* host) {
    return clEnqueueWriteBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
  }
  template <typename T>
  cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const std::vector<T> &host) {
    return WriteBuffer(queue, bytes, &host[0]);
  }
  size_t GetSize() const {
    auto bytes = size_t{0};
    auto status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, 0, nullptr, &bytes);
    if (status != CL_SUCCESS) { Error(status); }
    auto result = size_t{0};
    status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, bytes, &result, nullptr);
    if (status != CL_SUCCESS) { Error(status); }
    return result;
  }

  // Accessors to the private data-member
  cl_mem operator()() const { return buffer_; }
  cl_mem& operator()() { return buffer_; }
 private:
  cl_mem buffer_;
};

// =================================================================================================
} // namespace clblast

// CLBLAST_CLPP11_H_
#endif