Initial commit of preview version

author: CNugteren <web@cedricnugteren.nl> 2015-05-30 12:30:43 +0200
committer: CNugteren <web@cedricnugteren.nl> 2015-05-30 12:30:43 +0200
commit: bc5a341dfe591946e925db315fc7d8c0c25c2938 (patch)
tree: b216ab5eee4863e3807d92b5ddd19fa22197ed22 /samples
parent: c7b054ea6747039f4405fd93da6e924f3e5c7f4b (diff)
1 files changed, 108 insertions, 0 deletions
diff --git a/samples/sgemm.cc b/samples/sgemm.cc
new file mode 100644
index 00000000..f4015278
--- /dev/null
+++ b/samples/sgemm.cc
@@ -0,0 +1,108 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file demonstrates the use of the SGEMM routine. It is a stand-alone example, but it does
+// requires the Khronos C++ OpenCL API header file (not included). The example uses C++ features,
+// but CLBlast can also be used using the regular C-style OpenCL API.
+//
+// Note that this example is meant for illustration purposes only. CLBlast provides other programs
+// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx').
+//
+// =================================================================================================
+
+#include <cstdio>
+#include <chrono>
+#include <vector>
+
+// Includes the C++ OpenCL API. If not yet available, it can be found here:
+// https://www.khronos.org/registry/cl/api/1.1/cl.hpp
+#include <cl.hpp>
+
+// Includes the CLBlast library
+#include <clblast.h>
+
+// =================================================================================================
+
+// Example use of the single-precision Xgemm routine SGEMM
+int main() {
+
+  // OpenCL platform/device settings
+  const auto platform_id = 0;
+  const auto device_id = 0;
+
+  // Example SGEMM arguments
+  const size_t m = 128;
+  const size_t n = 64;
+  const size_t k = 512;
+  const float alpha = 0.7f;
+  const float beta = 1.0f;
+  const auto a_ld = k;
+  const auto b_ld = n;
+  const auto c_ld = n;
+
+  // Initializes the OpenCL platform
+  auto platforms = std::vector<cl::Platform>();
+  cl::Platform::get(&platforms);
+  if (platforms.size() == 0 || platform_id >= platforms.size()) { return 1; }
+  auto platform = platforms[platform_id];
+
+  // Initializes the OpenCL device (note: example for GPU devices only)
+  auto devices = std::vector<cl::Device>();
+  platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
+  if (devices.size() == 0 || device_id >= devices.size()) { return 1; }
+  auto device = devices[device_id];
+
+  // Creates the OpenCL context, queue, and an event
+  auto context = cl::Context({device});
+  auto queue = cl::CommandQueue(context, device);
+  auto event = cl::Event();
+
+  // Populate host matrices with some example data
+  auto host_a = std::vector<float>(m*k);
+  auto host_b = std::vector<float>(n*k);
+  auto host_c = std::vector<float>(m*n);
+  for (auto &item: host_a) { item = 12.193f; }
+  for (auto &item: host_b) { item = -8.199f; }
+  for (auto &item: host_c) { item = 0.0f; }
+
+  // Copy the matrices to the device
+  auto device_a = cl::Buffer(context, CL_MEM_READ_WRITE, host_a.size()*sizeof(float));
+  auto device_b = cl::Buffer(context, CL_MEM_READ_WRITE, host_b.size()*sizeof(float));
+  auto device_c = cl::Buffer(context, CL_MEM_READ_WRITE, host_c.size()*sizeof(float));
+  queue.enqueueWriteBuffer(device_a, CL_TRUE, 0, host_a.size()*sizeof(float), host_a.data());
+  queue.enqueueWriteBuffer(device_b, CL_TRUE, 0, host_b.size()*sizeof(float), host_b.data());
+  queue.enqueueWriteBuffer(device_c, CL_TRUE, 0, host_c.size()*sizeof(float), host_c.data());
+
+  // Start the timer
+  auto start_time = std::chrono::steady_clock::now();
+
+  // Call the SGEMM routine. Note that the type of alpha and beta (float) determine the precision.
+  auto queue_plain = queue();
+  auto event_plain = event();
+  auto status = Gemm(clblast::Layout::kRowMajor,
+                     clblast::Transpose::kNo, clblast::Transpose::kNo,
+                     m, n, k,
+                     alpha,
+                     device_a(), 0, a_ld,
+                     device_b(), 0, b_ld,
+                     beta,
+                     device_c(), 0, c_ld,
+                     &queue_plain, &event_plain);
+
+  // Record the execution time
+  event.wait();
+  auto elapsed_time = std::chrono::steady_clock::now() - start_time;
+  auto time_ms = std::chrono::duration<double,std::milli>(elapsed_time).count();
+
+  // Example completed. See "clblast.h" for status codes (0 -> success).
+  printf("Completed in %.3lf ms with status %d\n", time_ms, status);
+  return 0;
+}
+
+// =================================================================================================
author	CNugteren <web@cedricnugteren.nl>	2015-05-30 12:30:43 +0200
committer	CNugteren <web@cedricnugteren.nl>	2015-05-30 12:30:43 +0200
commit	bc5a341dfe591946e925db315fc7d8c0c25c2938 (patch)
tree	b216ab5eee4863e3807d92b5ddd19fa22197ed22 /samples
parent	c7b054ea6747039f4405fd93da6e924f3e5c7f4b (diff)