From 2952390f27c07500bd2a24b5e6fdce5e282fc8dd Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 29 Apr 2016 23:33:36 +0200 Subject: Added an example to demonstrate the use of the ClearCache and FillCache functions --- samples/cache.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 samples/cache.c (limited to 'samples/cache.c') diff --git a/samples/cache.c b/samples/cache.c new file mode 100644 index 00000000..7f876be1 --- /dev/null +++ b/samples/cache.c @@ -0,0 +1,133 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file demonstrates the CLBlast kernel cache, which stores compiled OpenCL binaries for faster +// repeated kernel execution. The cache can be pre-initialized or cleared. +// +// Note that this example is meant for illustration purposes only. CLBlast provides other programs +// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx'). +// +// ================================================================================================= + +#include +#include +#include +#include + +// Includes the CLBlast library (C interface) +#include + +// Forward declaration +void run_example_routine(const cl_device_id device); + +// ================================================================================================= + +// Example use of the CLBlast kernel cache +int main(void) { + + // OpenCL platform/device settings + const size_t platform_id = 0; + const size_t device_id = 0; + + // Initializes the OpenCL platform + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, platforms, NULL); + cl_platform_id platform = platforms[platform_id]; + + // Initializes the OpenCL device + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + cl_device_id device = devices[device_id]; + + // Run the routine multiple times in a row: after the first time the binary is already in the + // cache and compilation is no longer needed. + printf("Starting caching sample with an empty cache\n"); + run_example_routine(device); + run_example_routine(device); + run_example_routine(device); + + // Clearing the cache makes CLBlast re-compile the kernel once + printf("Clearing cache\n"); + CLBlastClearCache(); + run_example_routine(device); + run_example_routine(device); + + // When the cache is empty, it can be pre-initialized with compiled kernels for all routines by + // calling the CLBlastFillCache function, such that all other CLBlast calls can benefit from + // pre-compiled kernels and thus execute at maximum speed. + printf("Clearing cache\n"); + CLBlastClearCache(); + printf("Filling cache (this might take a while)\n"); + CLBlastFillCache(device); + run_example_routine(device); + + // Clean-up + free(platforms); + free(devices); + return 0; +} + +// ================================================================================================= + +// Runs an example routine and reports the time +void run_example_routine(const cl_device_id device) { + + // Example SASUM arguments + const size_t n = 1024*128; + + // Creates the OpenCL context, queue, and an event + cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); + cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL); + cl_event event = NULL; + + // Populate host data structures with some example data + float* host_input = (float*)malloc(sizeof(float)*n); + float* host_output = (float*)malloc(sizeof(float)*1); + for (size_t i=0; i success). + printf("Completed routine with status %d in %.3lf ms\n", status, time_ms); + + // Clean-up + free(host_input); + free(host_output); + clReleaseMemObject(device_input); + clReleaseMemObject(device_output); + clReleaseCommandQueue(queue); + clReleaseContext(context); +} + +// ================================================================================================= -- cgit v1.2.3