diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-06 20:52:12 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-06 20:52:12 +0100 |
commit | 0e1a1520233934e1d11bf2c32fc909617ff751ac (patch) | |
tree | 6cf9505552515a604ca06b7dff99dd980a3a33ef /samples | |
parent | a1cedf36e357f0ce19eba67e1e031c3fd2647fae (diff) |
First version of the tuning API, added interface for copy-kernel, added sample
Diffstat (limited to 'samples')
-rw-r--r-- | samples/tuning_api.cpp | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/samples/tuning_api.cpp b/samples/tuning_api.cpp new file mode 100644 index 00000000..3c9ba876 --- /dev/null +++ b/samples/tuning_api.cpp @@ -0,0 +1,77 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file demonstrates the use of the runtime tuning API. It is a stand-alone example, but it +// does require the Khronos C++ OpenCL API header file (downloaded by CMake). +// +// ================================================================================================= + +#include <cstdio> +#include <chrono> +#include <vector> + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS // to disable deprecation warnings +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings + +// Includes the C++ OpenCL API. If not yet available, it can be found here: +// https://www.khronos.org/registry/cl/api/1.1/cl.hpp +#include "cl.hpp" + +// Includes the CLBlast library +#include <clblast.h> + +// ================================================================================================= + +int main() { + + // OpenCL platform/device settings + const auto platform_id = 0; + const auto device_id = 0; + + // Example arguments + const size_t m = 128; + const size_t n = 64; + const auto fraction = 1.0; // between 0.0 and 1.0 + + // Initializes the OpenCL platform + auto platforms = std::vector<cl::Platform>(); + cl::Platform::get(&platforms); + if (platforms.size() == 0 || platform_id >= platforms.size()) { return 1; } + auto platform = platforms[platform_id]; + + // Initializes the OpenCL device + auto devices = std::vector<cl::Device>(); + platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); + if (devices.size() == 0 || device_id >= devices.size()) { return 1; } + auto device = devices[device_id]; + + // Creates the OpenCL context, queue, and an event + auto device_as_vector = std::vector<cl::Device>{device}; + auto context = cl::Context(device_as_vector); + auto queue = cl::CommandQueue(context, device); + + // Performs the tuning + printf("Starting the tuning...\n"); + std::unordered_map<std::string,size_t> parameters; + auto queue_plain = queue(); + auto status = clblast::TuneCopyMatrixFast<float>(&queue_plain, m, n, fraction, parameters); + + // Tuning completed. See "clblast.h" for status codes (0 -> success). + printf("Completed TuneCopyMatrixFast with status %d (0 == OK), found parameters:\n", static_cast<int>(status)); + for (const auto parameter: parameters) { + printf("> %s = %zu\n", parameter.first.c_str(), parameter.second); + } + + // Set the new parameters + status = clblast::OverrideParameters(device(), "Copy", clblast::Precision::kSingle, parameters); + printf("Completed OverrideParameters with status %d (0 == OK)\n", static_cast<int>(status)); + return 0; +} + +// ================================================================================================= |