From 591e343ec94077f873b1aa12052a4ce55ae80200 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 15 May 2016 20:18:34 +0200 Subject: Added an example of using the half-precision HAXPY routine --- CMakeLists.txt | 2 +- samples/haxpy.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 samples/haxpy.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 38588289..051e7643 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,7 +120,7 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS}) # Sets the supported routines and the used kernels. New routines and kernels should be added here. set(KERNELS copy pad transpose padtranspose xaxpy xdot xger xgemm xgemv) set(SAMPLE_PROGRAMS_CPP sgemm) -set(SAMPLE_PROGRAMS_C sasum dgemv sgemm cache) +set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xger xgeru xgerc xher xhpr xher2 xhpr2 xsyr xspr xsyr2 xspr2) diff --git a/samples/haxpy.c b/samples/haxpy.c new file mode 100644 index 00000000..3c7bb33a --- /dev/null +++ b/samples/haxpy.c @@ -0,0 +1,105 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file demonstrates the use of the HAXPY routine. It demonstrates the use of half-precision. +// +// Note that this example is meant for illustration purposes only. CLBlast provides other programs +// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx'). +// +// ================================================================================================= + +#include +#include +#include + +// Includes the CLBlast library (C interface) +#include + +// Includes the float-to-half and half-to-float conversion utilities +#include + +// ================================================================================================= + +// Example use of the half-precision routine HAXPY +int main(void) { + + // OpenCL platform/device settings + const size_t platform_id = 0; + const size_t device_id = 0; + + // Example HAXPY arguments + const size_t n = 8192; + const cl_half alpha = FloatToHalf(0.5f); + + // Initializes the OpenCL platform + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, platforms, NULL); + cl_platform_id platform = platforms[platform_id]; + + // Initializes the OpenCL device + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + cl_device_id device = devices[device_id]; + + // Creates the OpenCL context, queue, and an event + cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); + cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL); + cl_event event = NULL; + + // Populate host vectors with some example data + cl_half* host_a = (cl_half*)malloc(sizeof(cl_half)*n); + cl_half* host_b = (cl_half*)malloc(sizeof(cl_half)*n); + for (size_t i=0; i success). + printf("Completed HAXPY with status %d\n", status); + + // Prints the first output value + if (status == 0) { + printf("Output value at index 0: b[0] = %.3lf\n", HalfToFloat(host_b[0])); + } + + // Clean-up + free(platforms); + free(devices); + free(host_a); + free(host_b); + clReleaseMemObject(device_a); + clReleaseMemObject(device_b); + clReleaseCommandQueue(queue); + clReleaseContext(context); + return 0; +} + +// ================================================================================================= -- cgit v1.2.3