diff options
-rw-r--r-- | src/cupp11.hpp | 12 | ||||
-rw-r--r-- | src/utilities/utilities.cpp | 16 |
2 files changed, 18 insertions, 10 deletions
diff --git a/src/cupp11.hpp b/src/cupp11.hpp index 854c0be9..5b18d4cf 100644 --- a/src/cupp11.hpp +++ b/src/cupp11.hpp @@ -41,6 +41,7 @@ #include <string> // std::string #include <vector> // std::vector #include <memory> // std::shared_ptr +#include <cstring> // std::strlen // CUDA #include <cuda.h> // CUDA driver API @@ -251,6 +252,7 @@ public: auto result = std::string{}; result.resize(kStringLength); CheckError(cuDeviceGetName(&result[0], result.size(), device_)); + result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters return result; } std::string Type() const { return "GPU"; } @@ -657,12 +659,13 @@ public: // Constructor based on the regular CUDA data-type: memory management is handled elsewhere explicit Kernel(const CUmodule module, const CUfunction kernel): + name_("unknown"), module_(module), kernel_(kernel) { } // Regular constructor with memory management - explicit Kernel(const Program &program, const std::string &name) { + explicit Kernel(const Program &program, const std::string &name): name_(name) { CheckError(cuModuleLoadDataEx(&module_, program.GetIR().data(), 0, nullptr, nullptr)); CheckError(cuModuleGetFunction(&kernel_, module_, name.c_str())); } @@ -701,7 +704,7 @@ public: // Retrieves the name of the kernel std::string GetFunctionName() const { - return std::string{"unknown"}; // Not implemented for the CUDA backend + return name_; } // Launches a kernel onto the specified queue @@ -722,10 +725,10 @@ public: } // Launches the kernel, its execution time is recorded by events - CheckError(cuEventRecord(event->start(), queue())); + if (event) { CheckError(cuEventRecord(event->start(), queue())); } CheckError(cuLaunchKernel(kernel_, grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, queue(), pointers.data(), nullptr)); - CheckError(cuEventRecord(event->end(), queue())); + if (event) { CheckError(cuEventRecord(event->end(), queue())); } } // As above, but with an event waiting list @@ -748,6 +751,7 @@ public: const CUfunction& operator()() const { return kernel_; } CUfunction operator()() { return kernel_; } private: + const std::string name_; CUmodule module_; CUfunction kernel_; std::vector<size_t> arguments_indices_; // Indices of the arguments diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index a5c1d45e..f2574104 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -413,13 +413,17 @@ std::string GetDeviceVendor(const Device& device) { // Mid-level info std::string GetDeviceArchitecture(const Device& device) { auto device_architecture = std::string{""}; - if (device.HasExtension(kKhronosAttributesNVIDIA)) { + #ifdef CUDA_API device_architecture = device.NVIDIAComputeCapability(); - } - else if (device.HasExtension(kKhronosAttributesAMD)) { - device_architecture = device.Name(); // Name is architecture for AMD APP and AMD ROCm - } - // Note: no else - 'device_architecture' might be the empty string + #else + if (device.HasExtension(kKhronosAttributesNVIDIA)) { + device_architecture = device.NVIDIAComputeCapability(); + } + else if (device.HasExtension(kKhronosAttributesAMD)) { + device_architecture = device.Name(); // Name is architecture for AMD APP and AMD ROCm + } + // Note: no else - 'device_architecture' might be the empty string + #endif for (auto &find_and_replace : device_mapping::kArchitectureNames) { // replacing to common names if (device_architecture == find_and_replace.first) { device_architecture = find_and_replace.second; } |