summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt14
-rw-r--r--samples/sgemm.c108
2 files changed, 119 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cf89569..8b598bf8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,7 +103,8 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS})
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
set(KERNELS copy pad transpose padtranspose xaxpy xgemv xgemm)
-set(SAMPLE_PROGRAMS sgemm)
+set(SAMPLE_PROGRAMS_CPP sgemm)
+set(SAMPLE_PROGRAMS_C sgemm)
set(LEVEL1_ROUTINES xaxpy)
set(LEVEL2_ROUTINES xgemv xhemv xsymv)
set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm)
@@ -137,13 +138,20 @@ install(FILES include/clblast_c.h DESTINATION include)
# This section contains all the code related to the examples
if(SAMPLES)
- # Adds sample programs
- foreach(SAMPLE ${SAMPLE_PROGRAMS})
+ # Adds sample programs (C++)
+ foreach(SAMPLE ${SAMPLE_PROGRAMS_CPP})
add_executable(sample_${SAMPLE} samples/${SAMPLE}.cc)
target_link_libraries(sample_${SAMPLE} clblast ${OPENCL_LIBRARIES})
install(TARGETS sample_${SAMPLE} DESTINATION bin)
endforeach()
+ # Adds sample programs (C)
+ foreach(SAMPLE ${SAMPLE_PROGRAMS_C})
+ add_executable(sample_${SAMPLE}_c samples/${SAMPLE}.c)
+ target_link_libraries(sample_${SAMPLE}_c clblast ${OPENCL_LIBRARIES})
+ install(TARGETS sample_${SAMPLE}_c DESTINATION bin)
+ endforeach()
+
endif()
# ==================================================================================================
diff --git a/samples/sgemm.c b/samples/sgemm.c
new file mode 100644
index 00000000..f43fb147
--- /dev/null
+++ b/samples/sgemm.c
@@ -0,0 +1,108 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file demonstrates the use of the SGEMM routine. It is pure C99 and demonstrates the use of
+// the C API to the CLBlast library.
+//
+// Note that this example is meant for illustration purposes only. CLBlast provides other programs
+// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx').
+//
+// =================================================================================================
+
+#include <stdio.h>
+#include <string.h>
+
+// Includes the CLBlast library (C interface)
+#include <clblast_c.h>
+
+// =================================================================================================
+
+// Example use of the single-precision routine SGEMM
+int main(void) {
+
+ // OpenCL platform/device settings
+ const size_t platform_id = 0;
+ const size_t device_id = 0;
+
+ // Example SGEMM arguments
+ const size_t m = 128;
+ const size_t n = 64;
+ const size_t k = 512;
+ const float alpha = 0.7f;
+ const float beta = 1.0f;
+ const size_t a_ld = k;
+ const size_t b_ld = n;
+ const size_t c_ld = n;
+
+ // Initializes the OpenCL platform
+ cl_uint num_platforms;
+ clGetPlatformIDs(0, NULL, &num_platforms);
+ cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
+ clGetPlatformIDs(num_platforms, platforms, NULL);
+ cl_platform_id platform = platforms[platform_id];
+
+ // Initializes the OpenCL device (note: example for GPU devices only)
+ cl_uint num_devices;
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
+ cl_device_id device = devices[device_id];
+
+ // Creates the OpenCL context, queue, and an event
+ cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+ cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+ cl_event event = NULL;
+
+ // Populate host matrices with some example data
+ float* host_a = (float*)malloc(sizeof(float)*m*k);
+ float* host_b = (float*)malloc(sizeof(float)*n*k);
+ float* host_c = (float*)malloc(sizeof(float)*m*n);
+ for (size_t i=0; i<m*k; ++i) { host_a[i] = 12.193f; }
+ for (size_t i=0; i<n*k; ++i) { host_b[i] = -8.199f; }
+ for (size_t i=0; i<m*n; ++i) { host_c[i] = 0.0f; }
+
+ // Copy the matrices to the device
+ cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, NULL);
+ cl_mem device_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, NULL);
+ cl_mem device_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
+
+ // Call the SGEMM routine.
+ StatusCode status = CLBlastSgemm(kRowMajor, kNo, kNo,
+ m, n, k,
+ alpha,
+ device_a, 0, a_ld,
+ device_b, 0, b_ld,
+ beta,
+ device_c, 0, c_ld,
+ &queue, &event);
+
+ // Wait for completion
+ clWaitForEvents(1, &event);
+
+ // Example completed. See "clblast_c.h" for status codes (0 -> success).
+ printf("Completed with status %d\n", status);
+
+ // Clean-up
+ free(platforms);
+ free(devices);
+ free(host_a);
+ free(host_b);
+ free(host_c);
+ clReleaseMemObject(device_a);
+ clReleaseMemObject(device_b);
+ clReleaseMemObject(device_c);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(context);
+ return 0;
+}
+
+// =================================================================================================