diff options
-rw-r--r-- | CMakeLists.txt | 14 | ||||
-rw-r--r-- | samples/sgemm.c | 108 |
2 files changed, 119 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cf89569..8b598bf8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,7 +103,8 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS}) # Sets the supported routines and the used kernels. New routines and kernels should be added here. set(KERNELS copy pad transpose padtranspose xaxpy xgemv xgemm) -set(SAMPLE_PROGRAMS sgemm) +set(SAMPLE_PROGRAMS_CPP sgemm) +set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xaxpy) set(LEVEL2_ROUTINES xgemv xhemv xsymv) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) @@ -137,13 +138,20 @@ install(FILES include/clblast_c.h DESTINATION include) # This section contains all the code related to the examples if(SAMPLES) - # Adds sample programs - foreach(SAMPLE ${SAMPLE_PROGRAMS}) + # Adds sample programs (C++) + foreach(SAMPLE ${SAMPLE_PROGRAMS_CPP}) add_executable(sample_${SAMPLE} samples/${SAMPLE}.cc) target_link_libraries(sample_${SAMPLE} clblast ${OPENCL_LIBRARIES}) install(TARGETS sample_${SAMPLE} DESTINATION bin) endforeach() + # Adds sample programs (C) + foreach(SAMPLE ${SAMPLE_PROGRAMS_C}) + add_executable(sample_${SAMPLE}_c samples/${SAMPLE}.c) + target_link_libraries(sample_${SAMPLE}_c clblast ${OPENCL_LIBRARIES}) + install(TARGETS sample_${SAMPLE}_c DESTINATION bin) + endforeach() + endif() # ================================================================================================== diff --git a/samples/sgemm.c b/samples/sgemm.c new file mode 100644 index 00000000..f43fb147 --- /dev/null +++ b/samples/sgemm.c @@ -0,0 +1,108 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file demonstrates the use of the SGEMM routine. It is pure C99 and demonstrates the use of +// the C API to the CLBlast library. +// +// Note that this example is meant for illustration purposes only. CLBlast provides other programs +// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx'). +// +// ================================================================================================= + +#include <stdio.h> +#include <string.h> + +// Includes the CLBlast library (C interface) +#include <clblast_c.h> + +// ================================================================================================= + +// Example use of the single-precision routine SGEMM +int main(void) { + + // OpenCL platform/device settings + const size_t platform_id = 0; + const size_t device_id = 0; + + // Example SGEMM arguments + const size_t m = 128; + const size_t n = 64; + const size_t k = 512; + const float alpha = 0.7f; + const float beta = 1.0f; + const size_t a_ld = k; + const size_t b_ld = n; + const size_t c_ld = n; + + // Initializes the OpenCL platform + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, platforms, NULL); + cl_platform_id platform = platforms[platform_id]; + + // Initializes the OpenCL device (note: example for GPU devices only) + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); + cl_device_id device = devices[device_id]; + + // Creates the OpenCL context, queue, and an event + cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); + cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL); + cl_event event = NULL; + + // Populate host matrices with some example data + float* host_a = (float*)malloc(sizeof(float)*m*k); + float* host_b = (float*)malloc(sizeof(float)*n*k); + float* host_c = (float*)malloc(sizeof(float)*m*n); + for (size_t i=0; i<m*k; ++i) { host_a[i] = 12.193f; } + for (size_t i=0; i<n*k; ++i) { host_b[i] = -8.199f; } + for (size_t i=0; i<m*n; ++i) { host_c[i] = 0.0f; } + + // Copy the matrices to the device + cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, NULL); + cl_mem device_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, NULL); + cl_mem device_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, NULL); + clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL); + clEnqueueWriteBuffer(queue, device_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL); + clEnqueueWriteBuffer(queue, device_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL); + + // Call the SGEMM routine. + StatusCode status = CLBlastSgemm(kRowMajor, kNo, kNo, + m, n, k, + alpha, + device_a, 0, a_ld, + device_b, 0, b_ld, + beta, + device_c, 0, c_ld, + &queue, &event); + + // Wait for completion + clWaitForEvents(1, &event); + + // Example completed. See "clblast_c.h" for status codes (0 -> success). + printf("Completed with status %d\n", status); + + // Clean-up + free(platforms); + free(devices); + free(host_a); + free(host_b); + free(host_c); + clReleaseMemObject(device_a); + clReleaseMemObject(device_b); + clReleaseMemObject(device_c); + clReleaseCommandQueue(queue); + clReleaseContext(context); + return 0; +} + +// ================================================================================================= |