6 files changed, 971 insertions, 0 deletions
diff --git a/src/utilities/buffer_test.hpp b/src/utilities/buffer_test.hpp
new file mode 100644
index 00000000..9a23e0b7
--- /dev/null
+++ b/src/utilities/buffer_test.hpp
@@ -0,0 +1,113 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the tests for the OpenCL buffers (matrices and vectors). These tests are
+// templated and thus header-only.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_BUFFER_TEST_H_
+#define CLBLAST_BUFFER_TEST_H_
+
+#include "clblast.h"
+
+namespace clblast {
+// =================================================================================================
+
+// Tests matrix 'A' for validity
+template <typename T>
+void TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
+                       const size_t offset, const size_t ld) {
+  if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimA); }
+  try {
+    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryA); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixA, e.what()); }
+}
+
+// Tests matrix 'B' for validity
+template <typename T>
+void TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer,
+                       const size_t offset, const size_t ld) {
+  if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimB); }
+  try {
+    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryB); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixB, e.what()); }
+}
+
+// Tests matrix 'C' for validity
+template <typename T>
+void TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer,
+                       const size_t offset, const size_t ld) {
+  if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimC); }
+  try {
+    const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryC); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixC, e.what()); }
+}
+
+// Tests matrix 'AP' for validity
+template <typename T>
+void TestMatrixAP(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+  try {
+    const auto required_size = (((n * (n + 1)) / 2) + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryA); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixA, e.what()); }
+}
+
+// =================================================================================================
+
+// Tests vector 'X' for validity
+template <typename T>
+void TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset,
+                       const size_t inc) {
+  if (inc == 0) { throw BLASError(StatusCode::kInvalidIncrementX); }
+  try {
+    const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryX); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorX, e.what()); }
+}
+
+// Tests vector 'Y' for validity
+template <typename T>
+void TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset,
+                       const size_t inc) {
+  if (inc == 0) { throw BLASError(StatusCode::kInvalidIncrementY); }
+  try {
+    const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryY); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorY, e.what()); }
+}
+
+// =================================================================================================
+
+// Tests vector 'scalar' for validity
+template <typename T>
+void TestVectorScalar(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+  try {
+    const auto required_size = (n + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryScalar); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorScalar, e.what()); }
+}
+
+// Tests vector 'index' for validity
+template <typename T>
+void TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+  try {
+    const auto required_size = (n + offset) * sizeof(T);
+    if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryScalar); }
+  } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorScalar, e.what()); }
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_BUFFER_TEST_H_
+#endif
diff --git a/src/utilities/clblast_exceptions.cpp b/src/utilities/clblast_exceptions.cpp
new file mode 100644
index 00000000..96f10860
--- /dev/null
+++ b/src/utilities/clblast_exceptions.cpp
@@ -0,0 +1,95 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Ivan Shapovalov <intelfx@intelfx.name>
+//
+// This file implements the exception hierarchy for CLBlast. It contains classes for exceptions
+// generated by different parts of CLBlast (e.g. OpenCL API calls, internal logic, semantic BLAS
+// errors).
+//
+// =================================================================================================
+
+#include "utilities/clblast_exceptions.hpp"
+
+namespace {
+// =================================================================================================
+
+std::string MakeReason(const std::string &reason, const std::string &subreason) {
+  std::string r = reason;
+  if (!subreason.empty()) {
+    r += " (" + subreason + ")";
+  }
+  return r;
+}
+
+} // anonymous namespace
+
+namespace clblast {
+// =================================================================================================
+
+BLASError::BLASError(StatusCode status, const std::string &subreason):
+    ErrorCode(status,
+              subreason,
+              "BLAS error: " + MakeReason(std::to_string(static_cast<int>(status)), subreason)) {
+}
+
+RuntimeErrorCode::RuntimeErrorCode(StatusCode status, const std::string &subreason):
+    ErrorCode(status,
+              subreason,
+              MakeReason(std::to_string(static_cast<int>(status)), subreason)) {
+}
+
+// =================================================================================================
+
+StatusCode DispatchException()
+{
+  const char *message = nullptr;
+  StatusCode status;
+
+  try {
+    throw;
+  } catch (BLASError &e) {
+    // no message is printed for invalid argument errors
+    status = e.status();
+  } catch (CLError &e) {
+    message = e.what();
+    status = static_cast<StatusCode>(e.status());
+  } catch (RuntimeErrorCode &e) {
+    message = e.what();
+    status = e.status();
+  } catch (Error<std::runtime_error> &e) {
+    message = e.what();
+    status = StatusCode::kUnknownError;
+  }
+
+  if (message) {
+    fprintf(stderr, "CLBlast: %s\n", message);
+  }
+  return status;
+}
+
+// =================================================================================================
+
+StatusCode DispatchExceptionForC()
+{
+  const char *message = nullptr;
+
+  try {
+    throw;
+  } catch (std::exception &e) {
+    message = e.what();
+  } catch (...) {
+    message = "unknown exception";
+  }
+
+  fprintf (stderr, "CLBlast (unexpected): %s\n", message);
+  return StatusCode::kUnexpectedError;
+}
+
+// =================================================================================================
+
+} // namespace clblast
diff --git a/src/utilities/clblast_exceptions.hpp b/src/utilities/clblast_exceptions.hpp
new file mode 100644
index 00000000..89f5e761
--- /dev/null
+++ b/src/utilities/clblast_exceptions.hpp
@@ -0,0 +1,50 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Ivan Shapovalov <intelfx@intelfx.name>
+//
+// This file implements the exception hierarchy for CLBlast. It contains classes for exceptions
+// generated by different parts of CLBlast (e.g. OpenCL API calls, internal logic, semantic BLAS
+// errors).
+//
+// =================================================================================================
+
+#ifndef CLBLAST_EXCEPTIONS_H_
+#define CLBLAST_EXCEPTIONS_H_
+
+#include "clblast.h"
+#include "clpp11.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// Represents a semantic error in BLAS function arguments
+class PUBLIC_API BLASError : public ErrorCode<Error<std::invalid_argument>, StatusCode> {
+ public:
+  explicit BLASError(StatusCode status, const std::string &subreason = std::string{});
+};
+// =================================================================================================
+
+// Represents a runtime error generated by internal logic
+class PUBLIC_API RuntimeErrorCode : public ErrorCode<RuntimeError, StatusCode> {
+ public:
+  explicit RuntimeErrorCode(StatusCode status, const std::string &subreason = std::string{});
+};
+
+// =================================================================================================
+
+// Handles (most of the) runtime exceptions and converts them to StatusCode
+StatusCode DispatchException();
+
+// Handles remaining exceptions and converts them to StatusCode::kUnhandledError
+StatusCode DispatchExceptionForC();
+
+// =================================================================================================
+
+} // namespace clblast
+
+#endif // CLBLAST_EXCEPTIONS_H_
diff --git a/src/utilities/msvc.hpp b/src/utilities/msvc.hpp
new file mode 100644
index 00000000..a45105df
--- /dev/null
+++ b/src/utilities/msvc.hpp
@@ -0,0 +1,39 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file provides macro's and definitions to make compilation work on Microsoft Visual Studio,
+// in particular for versions older than 2015 with limited C++11 support.
+// MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015)
+// MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013)
+// MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012)
+// MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010)
+// MSVC++ 9.0  _MSC_VER == 1500 (Visual Studio 2008)
+//
+// =================================================================================================
+
+#ifndef CLBLAST_MSVC_HPP_
+#define CLBLAST_MSVC_HPP_
+
+namespace clblast {
+// =================================================================================================
+#ifdef _MSC_VER
+
+// No support for constexpr prior to 2015. Note that this only works with constants, not with
+// constexpr functions (unused in this project).
+#if _MSC_VER < 1900
+#define constexpr const
+#endif
+
+// _MSC_VER
+#endif
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_MSVC_HPP_
+#endif
diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp
new file mode 100644
index 00000000..b4a18311
--- /dev/null
+++ b/src/utilities/utilities.cpp
@@ -0,0 +1,409 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the common (test) utility functions.
+//
+// =================================================================================================
+
+#include "utilities/utilities.hpp"
+
+#include <string>
+#include <vector>
+#include <chrono>
+#include <random>
+#include <iomanip>
+
+namespace clblast {
+// =================================================================================================
+
+// Returns a scalar with a default value
+template <typename T>
+T GetScalar() {
+  return static_cast<T>(2.0);
+}
+template float GetScalar<float>();
+template double GetScalar<double>();
+
+// Specialized version of the above for half-precision
+template <>
+half GetScalar() {
+  return FloatToHalf(2.0f);
+}
+
+// Specialized versions of the above for complex data-types
+template <>
+float2 GetScalar() {
+  return {2.0f, 0.5f};
+}
+template <>
+double2 GetScalar() {
+  return {2.0, 0.5};
+}
+
+// Returns a scalar of value 1
+template <typename T>
+T ConstantOne() {
+  return static_cast<T>(1.0);
+}
+template float ConstantOne<float>();
+template double ConstantOne<double>();
+
+// Specialized version of the above for half-precision
+template <>
+half ConstantOne() {
+  return FloatToHalf(1.0f);
+}
+
+// Specialized versions of the above for complex data-types
+template <>
+float2 ConstantOne() {
+  return {1.0f, 0.0f};
+}
+template <>
+double2 ConstantOne() {
+  return {1.0, 0.0};
+}
+
+// =================================================================================================
+
+// Implements the string conversion using std::to_string if possible
+template <typename T>
+std::string ToString(T value) {
+  return std::to_string(value);
+}
+template std::string ToString<int>(int value);
+template std::string ToString<size_t>(size_t value);
+template std::string ToString<float>(float value);
+template std::string ToString<double>(double value);
+
+// If not possible directly: special cases for complex data-types
+template <>
+std::string ToString(float2 value) {
+  std::ostringstream real, imag;
+  real << std::setprecision(2) << value.real();
+  imag << std::setprecision(2) << value.imag();
+  return real.str()+"+"+imag.str()+"i";
+}
+template <>
+std::string ToString(double2 value) {
+  std::ostringstream real, imag;
+  real << std::setprecision(2) << value.real();
+  imag << std::setprecision(2) << value.imag();
+  return real.str()+"+"+imag.str()+"i";
+}
+
+// If not possible directly: special case for half-precision
+template <>
+std::string ToString(half value) {
+  return std::to_string(HalfToFloat(value));
+}
+
+// If not possible directly: special cases for CLBlast data-types
+template <>
+std::string ToString(Layout value) {
+  switch(value) {
+    case Layout::kRowMajor: return ToString(static_cast<int>(value))+" (row-major)";
+    case Layout::kColMajor: return ToString(static_cast<int>(value))+" (col-major)";
+  }
+}
+template <>
+std::string ToString(Transpose value) {
+  switch(value) {
+    case Transpose::kNo: return ToString(static_cast<int>(value))+" (regular)";
+    case Transpose::kYes: return ToString(static_cast<int>(value))+" (transposed)";
+    case Transpose::kConjugate: return ToString(static_cast<int>(value))+" (conjugate)";
+  }
+}
+template <>
+std::string ToString(Side value) {
+  switch(value) {
+    case Side::kLeft: return ToString(static_cast<int>(value))+" (left)";
+    case Side::kRight: return ToString(static_cast<int>(value))+" (right)";
+  }
+}
+template <>
+std::string ToString(Triangle value) {
+  switch(value) {
+    case Triangle::kUpper: return ToString(static_cast<int>(value))+" (upper)";
+    case Triangle::kLower: return ToString(static_cast<int>(value))+" (lower)";
+  }
+}
+template <>
+std::string ToString(Diagonal value) {
+  switch(value) {
+    case Diagonal::kUnit: return ToString(static_cast<int>(value))+" (unit)";
+    case Diagonal::kNonUnit: return ToString(static_cast<int>(value))+" (non-unit)";
+  }
+}
+template <>
+std::string ToString(Precision value) {
+  switch(value) {
+    case Precision::kHalf: return ToString(static_cast<int>(value))+" (half)";
+    case Precision::kSingle: return ToString(static_cast<int>(value))+" (single)";
+    case Precision::kDouble: return ToString(static_cast<int>(value))+" (double)";
+    case Precision::kComplexSingle: return ToString(static_cast<int>(value))+" (complex-single)";
+    case Precision::kComplexDouble: return ToString(static_cast<int>(value))+" (complex-double)";
+  }
+}
+
+// =================================================================================================
+
+// Helper for the below function to convert the argument to the value type. Adds specialization for
+// complex data-types. Note that complex arguments are accepted as regular values and are copied to
+// both the real and imaginary parts.
+template <typename T>
+T ConvertArgument(const char* value) {
+  return static_cast<T>(std::stoi(value));
+}
+template size_t ConvertArgument(const char* value);
+
+template <> half ConvertArgument(const char* value) {
+  return FloatToHalf(static_cast<float>(std::stod(value)));
+}
+template <> float ConvertArgument(const char* value) {
+  return static_cast<float>(std::stod(value));
+}
+template <> double ConvertArgument(const char* value) {
+  return static_cast<double>(std::stod(value));
+}
+template <> float2 ConvertArgument(const char* value) {
+  auto val = static_cast<float>(std::stod(value));
+  return float2{val, val};
+}
+template <> double2 ConvertArgument(const char* value) {
+  auto val = static_cast<double>(std::stod(value));
+  return double2{val, val};
+}
+
+// Variant of "ConvertArgument" with default values
+template <typename T>
+T ConvertArgument(const char* value, T default_value) {
+
+  if (value) { return ConvertArgument<T>(value); }
+  return default_value;
+}
+template size_t ConvertArgument(const char* value, size_t default_value);
+
+// This function matches patterns in the form of "-option value" or "--option value". It returns a
+// default value in case the option is not found in the argument string.
+template <typename T>
+T GetArgument(const int argc, char **argv, std::string &help,
+              const std::string &option, const T default_value) {
+
+  // Parses the argument. Note that this supports both the given option (e.g. -device) and one with
+  // an extra dash in front (e.g. --device).
+  auto return_value = static_cast<T>(default_value);
+  for (int c=0; c<argc; ++c) {
+    auto item = std::string{argv[c]};
+    if (item.compare("-"+option) == 0 || item.compare("--"+option) == 0) {
+      ++c;
+      return_value = ConvertArgument<T>(argv[c]);
+      break;
+    }
+  }
+
+  // Updates the help message and returns
+  help += "    -"+option+" "+ToString(return_value)+" ";
+  help += (return_value == default_value) ? "[=default]\n" : "\n";
+  return return_value;
+}
+
+// Compiles the above function
+template int GetArgument<int>(const int, char **, std::string&, const std::string&, const int);
+template size_t GetArgument<size_t>(const int, char **, std::string&, const std::string&, const size_t);
+template half GetArgument<half>(const int, char **, std::string&, const std::string&, const half);
+template float GetArgument<float>(const int, char **, std::string&, const std::string&, const float);
+template double GetArgument<double>(const int, char **, std::string&, const std::string&, const double);
+template float2 GetArgument<float2>(const int, char **, std::string&, const std::string&, const float2);
+template double2 GetArgument<double2>(const int, char **, std::string&, const std::string&, const double2);
+template Layout GetArgument<Layout>(const int, char **, std::string&, const std::string&, const Layout);
+template Transpose GetArgument<Transpose>(const int, char **, std::string&, const std::string&, const Transpose);
+template Side GetArgument<Side>(const int, char **, std::string&, const std::string&, const Side);
+template Triangle GetArgument<Triangle>(const int, char **, std::string&, const std::string&, const Triangle);
+template Diagonal GetArgument<Diagonal>(const int, char **, std::string&, const std::string&, const Diagonal);
+template Precision GetArgument<Precision>(const int, char **, std::string&, const std::string&, const Precision);
+
+// =================================================================================================
+
+// Returns only the precision argument
+Precision GetPrecision(const int argc, char *argv[], const Precision default_precision) {
+  auto dummy = std::string{};
+  return GetArgument(argc, argv, dummy, kArgPrecision, default_precision);
+}
+
+// =================================================================================================
+
+// Checks whether an argument is given. Returns true or false.
+bool CheckArgument(const int argc, char *argv[], std::string &help,
+                   const std::string &option) {
+
+  // Parses the argument. Note that this supports both the given option (e.g. -device) and one with
+  // an extra dash in front (e.g. --device).
+  auto return_value = false;
+  for (int c=0; c<argc; ++c) {
+    auto item = std::string{argv[c]};
+    if (item.compare("-"+option) == 0 || item.compare("--"+option) == 0) {
+      ++c;
+      return_value = true;
+    }
+  }
+
+  // Updates the help message and returns
+  help += "    -"+option+" ";
+  help += (return_value) ? "[true]\n" : "[false]\n";
+  return return_value;
+}
+
+// =================================================================================================
+
+// Returns a random seed. This used to be implemented using 'std::random_device', but that doesn't
+// always work. The chrono-timers are more reliable in that sense, but perhaps less random.
+unsigned int GetRandomSeed() {
+  return static_cast<unsigned int>(std::chrono::system_clock::now().time_since_epoch().count());
+}
+
+// Create a random number generator and populates a vector with samples from a random distribution
+template <typename T>
+void PopulateVector(std::vector<T> &vector, const unsigned int seed) {
+  auto lower_limit = static_cast<T>(kTestDataLowerLimit);
+  auto upper_limit = static_cast<T>(kTestDataUpperLimit);
+  std::mt19937 mt(seed);
+  std::uniform_real_distribution<T> dist(lower_limit, upper_limit);
+  for (auto &element: vector) { element = dist(mt); }
+}
+template void PopulateVector<float>(std::vector<float>&, const unsigned int);
+template void PopulateVector<double>(std::vector<double>&, const unsigned int);
+
+// Specialized versions of the above for complex data-types
+template <>
+void PopulateVector(std::vector<float2> &vector, const unsigned int seed) {
+  auto lower_limit = static_cast<float>(kTestDataLowerLimit);
+  auto upper_limit = static_cast<float>(kTestDataUpperLimit);
+  std::mt19937 mt(seed);
+  std::uniform_real_distribution<float> dist(lower_limit, upper_limit);
+  for (auto &element: vector) { element.real(dist(mt)); element.imag(dist(mt)); }
+}
+template <>
+void PopulateVector(std::vector<double2> &vector, const unsigned int seed) {
+  auto lower_limit = static_cast<double>(kTestDataLowerLimit);
+  auto upper_limit = static_cast<double>(kTestDataUpperLimit);
+  std::mt19937 mt(seed);
+  std::uniform_real_distribution<double> dist(lower_limit, upper_limit);
+  for (auto &element: vector) { element.real(dist(mt)); element.imag(dist(mt)); }
+}
+
+// Specialized versions of the above for half-precision
+template <>
+void PopulateVector(std::vector<half> &vector, const unsigned int seed) {
+  const auto lower_limit = static_cast<float>(kTestDataLowerLimit);
+  const auto upper_limit = static_cast<float>(kTestDataUpperLimit);
+  std::mt19937 mt(seed);
+  std::uniform_real_distribution<float> dist(lower_limit, upper_limit);
+  for (auto &element: vector) { element = FloatToHalf(dist(mt)); }
+}
+
+// =================================================================================================
+
+// Conversion between half and single-precision
+std::vector<float> HalfToFloatBuffer(const std::vector<half>& source) {
+  auto result = std::vector<float>(source.size());
+  for (auto i = size_t(0); i < source.size(); ++i) { result[i] = HalfToFloat(source[i]); }
+  return result;
+}
+void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source) {
+  for (auto i = size_t(0); i < source.size(); ++i) { result[i] = FloatToHalf(source[i]); }
+}
+
+// As above, but now for OpenCL data-types instead of std::vectors
+Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, cl_command_queue queue_raw) {
+  const auto size = source.GetSize() / sizeof(half);
+  auto queue = Queue(queue_raw);
+  auto context = queue.GetContext();
+  auto source_cpu = std::vector<half>(size);
+  source.Read(queue, size, source_cpu);
+  auto result_cpu = HalfToFloatBuffer(source_cpu);
+  auto result = Buffer<float>(context, size);
+  result.Write(queue, size, result_cpu);
+  return result;
+}
+void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, cl_command_queue queue_raw) {
+  const auto size = source.GetSize() / sizeof(float);
+  auto queue = Queue(queue_raw);
+  auto context = queue.GetContext();
+  auto source_cpu = std::vector<float>(size);
+  source.Read(queue, size, source_cpu);
+  auto result_cpu = std::vector<half>(size);
+  FloatToHalfBuffer(result_cpu, source_cpu);
+  result.Write(queue, size, result_cpu);
+}
+
+// Converts a 'real' value to a 'real argument' value to be passed to a kernel. Normally there is
+// no conversion, but half-precision is not supported as kernel argument so it is converted to float.
+template <> typename RealArg<half>::Type GetRealArg(const half value) { return HalfToFloat(value); }
+template <> typename RealArg<float>::Type GetRealArg(const float value) { return value; }
+template <> typename RealArg<double>::Type GetRealArg(const double value) { return value; }
+template <> typename RealArg<float2>::Type GetRealArg(const float2 value) { return value; }
+template <> typename RealArg<double2>::Type GetRealArg(const double2 value) { return value; }
+
+// =================================================================================================
+
+// Rounding functions performing ceiling and division operations
+size_t CeilDiv(const size_t x, const size_t y) {
+  return 1 + ((x - 1) / y);
+}
+size_t Ceil(const size_t x, const size_t y) {
+  return CeilDiv(x,y)*y;
+}
+
+// Helper function to determine whether or not 'a' is a multiple of 'b'
+bool IsMultiple(const size_t a, const size_t b) {
+  return ((a/b)*b == a) ? true : false;
+};
+
+// =================================================================================================
+
+// Convert the precision enum (as integer) into bytes
+size_t GetBytes(const Precision precision) {
+  switch(precision) {
+    case Precision::kHalf: return 2;
+    case Precision::kSingle: return 4;
+    case Precision::kDouble: return 8;
+    case Precision::kComplexSingle: return 8;
+    case Precision::kComplexDouble: return 16;
+  }
+}
+
+// Convert the template argument into a precision value
+template <> Precision PrecisionValue<half>() { return Precision::kHalf; }
+template <> Precision PrecisionValue<float>() { return Precision::kSingle; }
+template <> Precision PrecisionValue<double>() { return Precision::kDouble; }
+template <> Precision PrecisionValue<float2>() { return Precision::kComplexSingle; }
+template <> Precision PrecisionValue<double2>() { return Precision::kComplexDouble; }
+
+// =================================================================================================
+
+// Returns false is this precision is not supported by the device
+template <> bool PrecisionSupported<float>(const Device &) { return true; }
+template <> bool PrecisionSupported<float2>(const Device &) { return true; }
+template <> bool PrecisionSupported<double>(const Device &device) {
+  auto extensions = device.Capabilities();
+  return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true;
+}
+template <> bool PrecisionSupported<double2>(const Device &device) {
+  auto extensions = device.Capabilities();
+  return (extensions.find(kKhronosDoublePrecision) == std::string::npos) ? false : true;
+}
+template <> bool PrecisionSupported<half>(const Device &device) {
+  auto extensions = device.Capabilities();
+  if (device.Name() == "Mali-T628") { return true; } // supports fp16 but not cl_khr_fp16 officially
+  return (extensions.find(kKhronosHalfPrecision) == std::string::npos) ? false : true;
+}
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
new file mode 100644
index 00000000..9bc7401a
--- /dev/null
+++ b/src/utilities/utilities.hpp
@@ -0,0 +1,265 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file provides declarations for the common (test) utility functions such as a command-line
+// argument parser. On top of this, it serves as the 'common' header, including the C++ OpenCL
+// wrapper. These utilities are not only used for CLBlast, but also included as part of the tuners,
+// the performance client and the correctness testers.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_UTILITIES_H_
+#define CLBLAST_UTILITIES_H_
+
+#include <string>
+#include <functional>
+#include <complex>
+
+#include "clblast.h"
+#include "clblast_half.h"
+#include "clpp11.hpp"
+#include "utilities/clblast_exceptions.hpp"
+#include "utilities/msvc.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// Shorthands for complex data-types
+using float2 = std::complex<float>;
+using double2 = std::complex<double>;
+
+// Khronos OpenCL extensions
+const std::string kKhronosHalfPrecision = "cl_khr_fp16";
+const std::string kKhronosDoublePrecision = "cl_khr_fp64";
+
+// Catched an unknown error
+constexpr auto kUnknownError = -999;
+
+// =================================================================================================
+
+// The routine-specific arguments in string form
+constexpr auto kArgM = "m";
+constexpr auto kArgN = "n";
+constexpr auto kArgK = "k";
+constexpr auto kArgKL = "kl";
+constexpr auto kArgKU = "ku";
+constexpr auto kArgLayout = "layout";
+constexpr auto kArgATransp = "transA";
+constexpr auto kArgBTransp = "transB";
+constexpr auto kArgSide = "side";
+constexpr auto kArgTriangle = "triangle";
+constexpr auto kArgDiagonal = "diagonal";
+constexpr auto kArgXInc = "incx";
+constexpr auto kArgYInc = "incy";
+constexpr auto kArgXOffset = "offx";
+constexpr auto kArgYOffset = "offy";
+constexpr auto kArgALeadDim = "lda";
+constexpr auto kArgBLeadDim = "ldb";
+constexpr auto kArgCLeadDim = "ldc";
+constexpr auto kArgAOffset = "offa";
+constexpr auto kArgBOffset = "offb";
+constexpr auto kArgCOffset = "offc";
+constexpr auto kArgAPOffset = "offap";
+constexpr auto kArgDotOffset = "offdot";
+constexpr auto kArgNrm2Offset = "offnrm2";
+constexpr auto kArgAsumOffset = "offasum";
+constexpr auto kArgImaxOffset = "offimax";
+constexpr auto kArgAlpha = "alpha";
+constexpr auto kArgBeta = "beta";
+
+// The tuner-specific arguments in string form
+constexpr auto kArgFraction = "fraction";
+
+// The client-specific arguments in string form
+constexpr auto kArgCompareclblas = "clblas";
+constexpr auto kArgComparecblas = "cblas";
+constexpr auto kArgStepSize = "step";
+constexpr auto kArgNumSteps = "num_steps";
+constexpr auto kArgNumRuns = "runs";
+constexpr auto kArgWarmUp = "warm_up";
+
+// The test-specific arguments in string form
+constexpr auto kArgFullTest = "full_test";
+constexpr auto kArgVerbose = "verbose";
+
+// The common arguments in string form
+constexpr auto kArgPlatform = "platform";
+constexpr auto kArgDevice = "device";
+constexpr auto kArgPrecision = "precision";
+constexpr auto kArgHelp = "h";
+constexpr auto kArgQuiet = "q";
+constexpr auto kArgNoAbbreviations = "no_abbrv";
+
+// =================================================================================================
+
+// Returns a scalar with a default value
+template <typename T>
+T GetScalar();
+
+// Returns a scalar of value 1
+template <typename T>
+T ConstantOne();
+
+// =================================================================================================
+
+// Structure containing all possible arguments for test clients, including their default values
+template <typename T>
+struct Arguments {
+  // Routine-specific arguments
+  size_t m = 1;
+  size_t n = 1;
+  size_t k = 1;
+  size_t ku = 1;
+  size_t kl = 1;
+  Layout layout = Layout::kRowMajor;
+  Transpose a_transpose = Transpose::kNo;
+  Transpose b_transpose = Transpose::kNo;
+  Side side = Side::kLeft;
+  Triangle triangle = Triangle::kUpper;
+  Diagonal diagonal = Diagonal::kUnit;
+  size_t x_inc = 1;
+  size_t y_inc = 1;
+  size_t x_offset = 0;
+  size_t y_offset = 0;
+  size_t a_ld = 1;
+  size_t b_ld = 1;
+  size_t c_ld = 1;
+  size_t a_offset = 0;
+  size_t b_offset = 0;
+  size_t c_offset = 0;
+  size_t ap_offset = 0;
+  size_t dot_offset = 0;
+  size_t nrm2_offset = 0;
+  size_t asum_offset = 0;
+  size_t imax_offset = 0;
+  T alpha = ConstantOne<T>();
+  T beta = ConstantOne<T>();
+  size_t x_size = 1;
+  size_t y_size = 1;
+  size_t a_size = 1;
+  size_t b_size = 1;
+  size_t c_size = 1;
+  size_t ap_size = 1;
+  size_t scalar_size = 1;
+  // Tuner-specific arguments
+  double fraction = 1.0;
+  // Client-specific arguments
+  int compare_clblas = 1;
+  int compare_cblas = 1;
+  size_t step = 1;
+  size_t num_steps = 0;
+  size_t num_runs = 10;
+  // Common arguments
+  size_t platform_id = 0;
+  size_t device_id = 0;
+  Precision precision = Precision::kSingle;
+  bool print_help = false;
+  bool silent = false;
+  bool no_abbrv = false;
+};
+
+// Structure containing all possible buffers for test clients
+template <typename T>
+struct Buffers {
+  Buffer<T> x_vec;
+  Buffer<T> y_vec;
+  Buffer<T> a_mat;
+  Buffer<T> b_mat;
+  Buffer<T> c_mat;
+  Buffer<T> ap_mat;
+  Buffer<T> scalar;
+};
+
+// =================================================================================================
+
+// Converts a value (e.g. an integer) to a string. This also covers special cases for CLBlast
+// data-types such as the Layout and Transpose data-types.
+template <typename T>
+std::string ToString(T value);
+
+// =================================================================================================
+
+// Helper for the function "GetArgument"
+template <typename T>
+T ConvertArgument(const char* value);
+
+// Variant of "ConvertArgument" with default values
+template <typename T>
+T ConvertArgument(const char* value, T default_value);
+
+// Basic argument parser, matching patterns in the form of "-option value" and "--option value"
+template <typename T>
+T GetArgument(const int argc, char **argv, std::string &help,
+              const std::string &option, const T default_value);
+
+// Returns the precision only
+Precision GetPrecision(const int argc, char *argv[],
+                       const Precision default_precision = Precision::kSingle);
+
+// As in "GetArgument", but now only checks whether an argument is given or not
+bool CheckArgument(const int argc, char *argv[], std::string &help, const std::string &option);
+
+// =================================================================================================
+
+// Returns a random number to be used as a seed
+unsigned int GetRandomSeed();
+
+// Test/example data lower and upper limit
+constexpr auto kTestDataLowerLimit = -2.0;
+constexpr auto kTestDataUpperLimit = 2.0;
+
+// Populates a vector with random data
+template <typename T>
+void PopulateVector(std::vector<T> &vector, const unsigned int seed);
+
+// =================================================================================================
+
+// Conversion between half and single-precision
+std::vector<float> HalfToFloatBuffer(const std::vector<half>& source);
+void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source);
+
+// As above, but now for OpenCL data-types instead of std::vectors
+Buffer<float> HalfToFloatBuffer(const Buffer<half>& source, cl_command_queue queue_raw);
+void FloatToHalfBuffer(Buffer<half>& result, const Buffer<float>& source, cl_command_queue queue_raw);
+
+// Converts a 'real' value to a 'real argument' value to be passed to a kernel. Normally there is
+// no conversion, but half-precision is not supported as kernel argument so it is converted to float.
+template <typename T> struct RealArg { using Type = T; };
+template <> struct RealArg<half> { using Type = float; };
+template <typename T> typename RealArg<T>::Type GetRealArg(const T value);
+
+// =================================================================================================
+
+// Rounding functions
+size_t CeilDiv(const size_t x, const size_t y);
+size_t Ceil(const size_t x, const size_t y);
+
+// Returns whether or not 'a' is a multiple of 'b'
+bool IsMultiple(const size_t a, const size_t b);
+
+// =================================================================================================
+
+// Convert the precision enum into bytes, e.g. a double takes up 8 bytes
+size_t GetBytes(const Precision precision);
+
+// Convert the template argument into a precision value
+template <typename T>
+Precision PrecisionValue();
+
+// =================================================================================================
+
+// Returns false is this precision is not supported by the device
+template <typename T>
+bool PrecisionSupported(const Device &device);
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_UTILITIES_H_
+#endif