summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-04-29 20:33:19 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-04-29 20:33:19 +0200
commit4f528b1730df1ffda9d396030cfc4c4ddf0203fb (patch)
tree3b1a7c29d3a08fb5963c5bf79d50c13e71cb3149 /samples
parentd9b21d7f4920b115d3fe7f2e3cce1f89eb762c10 (diff)
Added sample C programs for the SASUM and DGEMV routines
Diffstat (limited to 'samples')
-rw-r--r--samples/dgemv.c106
-rw-r--r--samples/sasum.c96
-rw-r--r--samples/sgemm.c8
-rw-r--r--samples/sgemm.cc6
4 files changed, 209 insertions, 7 deletions
diff --git a/samples/dgemv.c b/samples/dgemv.c
new file mode 100644
index 00000000..6ea0deb0
--- /dev/null
+++ b/samples/dgemv.c
@@ -0,0 +1,106 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file demonstrates the use of the DGEMV routine. It is pure C99 and demonstrates the use of
+// the C API to the CLBlast library.
+//
+// Note that this example is meant for illustration purposes only. CLBlast provides other programs
+// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx').
+//
+// =================================================================================================
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+// Includes the CLBlast library (C interface)
+#include <clblast_c.h>
+
+// =================================================================================================
+
+// Example use of the double-precision routine DGEMV
+int main(void) {
+
+ // OpenCL platform/device settings
+ const size_t platform_id = 0;
+ const size_t device_id = 0;
+
+ // Example DGEMV arguments
+ const size_t m = 128;
+ const size_t n = 289;
+ const double alpha = 0.7;
+ const double beta = 0.0;
+ const size_t a_ld = n;
+
+ // Initializes the OpenCL platform
+ cl_uint num_platforms;
+ clGetPlatformIDs(0, NULL, &num_platforms);
+ cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
+ clGetPlatformIDs(num_platforms, platforms, NULL);
+ cl_platform_id platform = platforms[platform_id];
+
+ // Initializes the OpenCL device
+ cl_uint num_devices;
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+ cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+ cl_device_id device = devices[device_id];
+
+ // Creates the OpenCL context, queue, and an event
+ cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+ cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+ cl_event event = NULL;
+
+ // Populate host data structures with some example data
+ double* host_a = (double*)malloc(sizeof(double)*m*n);
+ double* host_x = (double*)malloc(sizeof(double)*n);
+ double* host_y = (double*)malloc(sizeof(double)*m);
+ for (size_t i=0; i<m*n; ++i) { host_a[i] = 12.193; }
+ for (size_t i=0; i<n; ++i) { host_x[i] = -8.199; }
+ for (size_t i=0; i<m; ++i) { host_y[i] = 0.0; }
+
+ // Copy the data-structures to the device
+ cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(double), NULL, NULL);
+ cl_mem device_x = clCreateBuffer(context, CL_MEM_READ_WRITE, n*sizeof(double), NULL, NULL);
+ cl_mem device_y = clCreateBuffer(context, CL_MEM_READ_WRITE, m*sizeof(double), NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, m*n*sizeof(double), host_a, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_x, CL_TRUE, 0, n*sizeof(double), host_x, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_y, CL_TRUE, 0, m*sizeof(double), host_y, 0, NULL, NULL);
+
+ // Call the DGEMV routine.
+ StatusCode status = CLBlastDgemv(kRowMajor, kNo,
+ m, n,
+ alpha,
+ device_a, 0, a_ld,
+ device_x, 0, 1,
+ beta,
+ device_y, 0, 1,
+ &queue, &event);
+
+ // Wait for completion
+ clWaitForEvents(1, &event);
+
+ // Example completed. See "clblast_c.h" for status codes (0 -> success).
+ printf("Completed DGEMV with status %d\n", status);
+
+ // Clean-up
+ free(platforms);
+ free(devices);
+ free(host_a);
+ free(host_x);
+ free(host_y);
+ clReleaseMemObject(device_a);
+ clReleaseMemObject(device_x);
+ clReleaseMemObject(device_y);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(context);
+ return 0;
+}
+
+// =================================================================================================
diff --git a/samples/sasum.c b/samples/sasum.c
new file mode 100644
index 00000000..3b20d301
--- /dev/null
+++ b/samples/sasum.c
@@ -0,0 +1,96 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file demonstrates the use of the SASUM routine. It is pure C99 and demonstrates the use of
+// the C API to the CLBlast library.
+//
+// Note that this example is meant for illustration purposes only. CLBlast provides other programs
+// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx').
+//
+// =================================================================================================
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+// Includes the CLBlast library (C interface)
+#include <clblast_c.h>
+
+// =================================================================================================
+
+// Example use of the single-precision routine SASUM
+int main(void) {
+
+ // OpenCL platform/device settings
+ const size_t platform_id = 0;
+ const size_t device_id = 0;
+
+ // Example SASUM arguments
+ const size_t n = 1000;
+ const float input_value = -1.5f;
+
+ // Initializes the OpenCL platform
+ cl_uint num_platforms;
+ clGetPlatformIDs(0, NULL, &num_platforms);
+ cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
+ clGetPlatformIDs(num_platforms, platforms, NULL);
+ cl_platform_id platform = platforms[platform_id];
+
+ // Initializes the OpenCL device
+ cl_uint num_devices;
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+ cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+ cl_device_id device = devices[device_id];
+
+ // Creates the OpenCL context, queue, and an event
+ cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+ cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+ cl_event event = NULL;
+
+ // Populate host data structures with some example data
+ float* host_input = (float*)malloc(sizeof(float)*n);
+ float* host_output = (float*)malloc(sizeof(float)*1);
+ for (size_t i=0; i<n; ++i) { host_input[i] = input_value; }
+ for (size_t i=0; i<1; ++i) { host_output[i] = 0.0f; }
+
+ // Copy the data-structures to the device
+ cl_mem device_input = clCreateBuffer(context, CL_MEM_READ_WRITE, n*sizeof(float), NULL, NULL);
+ cl_mem device_output = clCreateBuffer(context, CL_MEM_READ_WRITE, 1*sizeof(float), NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_input, CL_TRUE, 0, n*sizeof(float), host_input, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_output, CL_TRUE, 0, 1*sizeof(float), host_output, 0, NULL, NULL);
+
+ // Call the SASUM routine.
+ StatusCode status = CLBlastSasum(n,
+ device_output, 0,
+ device_input, 0, 1,
+ &queue, &event);
+
+ // Wait for completion
+ clWaitForEvents(1, &event);
+
+ // Copies the result back to the host
+ clEnqueueReadBuffer(queue, device_output, CL_TRUE, 0, 1*sizeof(float), host_output, 0, NULL, NULL);
+
+ // Example completed. See "clblast_c.h" for status codes (0 -> success).
+ printf("Completed SASUM with status %d: %d * |%.1lf| = %.1lf\n", status, n, input_value, host_output[0]);
+
+ // Clean-up
+ free(platforms);
+ free(devices);
+ free(host_input);
+ free(host_output);
+ clReleaseMemObject(device_input);
+ clReleaseMemObject(device_output);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(context);
+ return 0;
+}
+
+// =================================================================================================
diff --git a/samples/sgemm.c b/samples/sgemm.c
index d528db0a..79f30c83 100644
--- a/samples/sgemm.c
+++ b/samples/sgemm.c
@@ -48,11 +48,11 @@ int main(void) {
clGetPlatformIDs(num_platforms, platforms, NULL);
cl_platform_id platform = platforms[platform_id];
- // Initializes the OpenCL device (note: example for GPU devices only)
+ // Initializes the OpenCL device
cl_uint num_devices;
- clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
- clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
cl_device_id device = devices[device_id];
// Creates the OpenCL context, queue, and an event
@@ -90,7 +90,7 @@ int main(void) {
clWaitForEvents(1, &event);
// Example completed. See "clblast_c.h" for status codes (0 -> success).
- printf("Completed with status %d\n", status);
+ printf("Completed SGEMM with status %d\n", status);
// Clean-up
free(platforms);
diff --git a/samples/sgemm.cc b/samples/sgemm.cc
index 2659d36c..5fe7490a 100644
--- a/samples/sgemm.cc
+++ b/samples/sgemm.cc
@@ -52,9 +52,9 @@ int main() {
if (platforms.size() == 0 || platform_id >= platforms.size()) { return 1; }
auto platform = platforms[platform_id];
- // Initializes the OpenCL device (note: example for GPU devices only)
+ // Initializes the OpenCL device
auto devices = std::vector<cl::Device>();
- platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
+ platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
if (devices.size() == 0 || device_id >= devices.size()) { return 1; }
auto device = devices[device_id];
@@ -100,7 +100,7 @@ int main() {
auto time_ms = std::chrono::duration<double,std::milli>(elapsed_time).count();
// Example completed. See "clblast.h" for status codes (0 -> success).
- printf("Completed in %.3lf ms with status %d\n", time_ms, status);
+ printf("Completed SGEMM in %.3lf ms with status %d\n", time_ms, status);
return 0;
}