summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-10-15 12:17:35 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-10-15 12:17:35 +0200
commit7408da174c848ffeaa1fe2da52f26a057e65b0f1 (patch)
tree27045f3a79905450829845dd3725f7152282f505
parent55a802c63d79264bf6e5e9d82a1df34bbe85ee64 (diff)
Various fixes to make the first CUDA examples work
-rw-r--r--src/cupp11.hpp12
-rw-r--r--src/utilities/utilities.cpp16
2 files changed, 18 insertions, 10 deletions
diff --git a/src/cupp11.hpp b/src/cupp11.hpp
index 854c0be9..5b18d4cf 100644
--- a/src/cupp11.hpp
+++ b/src/cupp11.hpp
@@ -41,6 +41,7 @@
#include <string> // std::string
#include <vector> // std::vector
#include <memory> // std::shared_ptr
+#include <cstring> // std::strlen
// CUDA
#include <cuda.h> // CUDA driver API
@@ -251,6 +252,7 @@ public:
auto result = std::string{};
result.resize(kStringLength);
CheckError(cuDeviceGetName(&result[0], result.size(), device_));
+ result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters
return result;
}
std::string Type() const { return "GPU"; }
@@ -657,12 +659,13 @@ public:
// Constructor based on the regular CUDA data-type: memory management is handled elsewhere
explicit Kernel(const CUmodule module, const CUfunction kernel):
+ name_("unknown"),
module_(module),
kernel_(kernel) {
}
// Regular constructor with memory management
- explicit Kernel(const Program &program, const std::string &name) {
+ explicit Kernel(const Program &program, const std::string &name): name_(name) {
CheckError(cuModuleLoadDataEx(&module_, program.GetIR().data(), 0, nullptr, nullptr));
CheckError(cuModuleGetFunction(&kernel_, module_, name.c_str()));
}
@@ -701,7 +704,7 @@ public:
// Retrieves the name of the kernel
std::string GetFunctionName() const {
- return std::string{"unknown"}; // Not implemented for the CUDA backend
+ return name_;
}
// Launches a kernel onto the specified queue
@@ -722,10 +725,10 @@ public:
}
// Launches the kernel, its execution time is recorded by events
- CheckError(cuEventRecord(event->start(), queue()));
+ if (event) { CheckError(cuEventRecord(event->start(), queue())); }
CheckError(cuLaunchKernel(kernel_, grid[0], grid[1], grid[2], block[0], block[1], block[2],
0, queue(), pointers.data(), nullptr));
- CheckError(cuEventRecord(event->end(), queue()));
+ if (event) { CheckError(cuEventRecord(event->end(), queue())); }
}
// As above, but with an event waiting list
@@ -748,6 +751,7 @@ public:
const CUfunction& operator()() const { return kernel_; }
CUfunction operator()() { return kernel_; }
private:
+ const std::string name_;
CUmodule module_;
CUfunction kernel_;
std::vector<size_t> arguments_indices_; // Indices of the arguments
diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp
index a5c1d45e..f2574104 100644
--- a/src/utilities/utilities.cpp
+++ b/src/utilities/utilities.cpp
@@ -413,13 +413,17 @@ std::string GetDeviceVendor(const Device& device) {
// Mid-level info
std::string GetDeviceArchitecture(const Device& device) {
auto device_architecture = std::string{""};
- if (device.HasExtension(kKhronosAttributesNVIDIA)) {
+ #ifdef CUDA_API
device_architecture = device.NVIDIAComputeCapability();
- }
- else if (device.HasExtension(kKhronosAttributesAMD)) {
- device_architecture = device.Name(); // Name is architecture for AMD APP and AMD ROCm
- }
- // Note: no else - 'device_architecture' might be the empty string
+ #else
+ if (device.HasExtension(kKhronosAttributesNVIDIA)) {
+ device_architecture = device.NVIDIAComputeCapability();
+ }
+ else if (device.HasExtension(kKhronosAttributesAMD)) {
+ device_architecture = device.Name(); // Name is architecture for AMD APP and AMD ROCm
+ }
+ // Note: no else - 'device_architecture' might be the empty string
+ #endif
for (auto &find_and_replace : device_mapping::kArchitectureNames) { // replacing to common names
if (device_architecture == find_and_replace.first) { device_architecture = find_and_replace.second; }