23 files changed, 108 insertions, 99 deletions
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
new file mode 100644
index 00000000..5fe1eb25
--- /dev/null
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,59 @@
+name: CLBlast build
+
+on:
+  pull_request: {}
+  push:
+    branches: ['master']
+
+jobs:
+
+  build_and_test:
+    strategy:
+      matrix:
+        config: [
+          {os: ubuntu-latest, c_compiler: gcc, cpp_compiler: g++},
+          {os: ubuntu-latest, c_compiler: clang, cpp_compiler: clang++},
+          {os: macos-latest, c_compiler: clang, cpp_compiler: clang++},
+        ]
+    runs-on: ${{ matrix.config.os }}
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install requirements for Ubuntu
+      run: |
+        sudo apt-get update
+        sudo apt-get install -yq cmake ninja-build ocl-icd-opencl-dev opencl-c-headers libopenblas-dev --no-install-recommends
+      if: ${{ matrix.config.os == 'ubuntu-latest' }}
+
+    - name: Install requirements for macOS
+      run: brew install ninja
+      if: ${{ matrix.config.os == 'macos-latest' }}
+
+    - name: Run CMake
+      run: |
+        export CC=${{ matrix.config.c_compiler }}
+        export CXX=${{ matrix.config.cpp_compiler }}
+        cmake -S . -B build -G Ninja -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON
+
+    - name: Compile the code
+      run: cmake --build build
+
+    - name: Get the diagnostics info
+      run: ./build/clblast_test_diagnostics
+      if: ${{ matrix.config.os == 'macos-latest' }}
+
+    - name: Run an example client
+      run: ./build/clblast_client_xgemm
+      if: ${{ matrix.config.os == 'macos-latest' }}
+
+    - name: Run an example sample program
+      run: ./build/clblast_sample_dgemv_c
+      if: ${{ matrix.config.os == 'macos-latest' }}
+
+    - name: Run an example tuner
+      run: ./build/clblast_tuner_xdot
+      if: ${{ matrix.config.os == 'macos-latest' }}
+
+    - name: Run the unittests
+      run: ctest --test-dir build
+      if: ${{ matrix.config.os == 'macos-latest' }}
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index dbdae42e..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-language: cpp
-sudo: required
-dist: trusty
-
-addons:
-  apt:
-    sources:
-      # kubuntu-backports contains newer versions of cmake to install
-      - kubuntu-backports
-    packages:
-      - cmake
-      - ocl-icd-opencl-dev
-
-matrix:
-  include:
-    - os: linux
-      compiler: gcc
-    - os: linux
-      compiler: clang
-    - os: osx
-
-env:
-  global:
-    - CLBLAST_VERSION=1.5.3
-    - CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
-    - CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
-    - CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
-
-before_install:
-  - cmake --version;
-  - ${CC} --version;
-  - ${CXX} --version;
-
-before_script:
-  - mkdir -p ${CLBLAST_INSTALL}
-  - mkdir -p ${CLBLAST_ROOT}
-  - pushd ${CLBLAST_ROOT}
-  - cmake -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON -DCMAKE_INSTALL_PREFIX=${CLBLAST_INSTALL} ${TRAVIS_BUILD_DIR}
-
-script:
-  - make
-  - make install
-
-after_success:
-  - pushd ${TRAVIS_BUILD_DIR}/bin
-  - rm ${CLBLAST_INSTALL}/bin/clblast_client_*
-  - rm ${CLBLAST_INSTALL}/bin/clblast_test_*
-  - echo ${CLBLAST_TAR}
-  - tar -cvf ${CLBLAST_TAR} CLBlast-${CLBLAST_VERSION}
-  - cp ${CLBLAST_TAR} ${TRAVIS_BUILD_DIR}
-  - pushd ${TRAVIS_BUILD_DIR}
-  - ls -l
-
-branches:
-  only:
-    - master
-
-notifications:
-  email: false
-
-deploy:
-  provider: releases
-  api_key:
-    secure: oBnP56zfFTiON0v6nm6qiRevtTsojqaxV2E/+ahUP4iyZxZgn1zf9reGNEbB/s6wfHCwXpXKlCk3A0cEQzbfoZeQy3oMzyWHV/xgu+etOENe3z18oVEiVBe/WAd1/hMVmQvX65kHR+q12rce6K6rDm1mEIJC/udf5Dbdl2alVWgiL20Hrj/PSQAYZZuTmZLuMm7OBc1G2xhRmRo5FYgI2u1ZALUHDRov/yLQkoKwxAlzBhURoNTHW2wTAr3Pq01Fk2kfQFRmg7YFieu3cit/JGNzaDdgmT0U5pLRzhuPiD3qziNnC3rG7tnYV0jHQOLKH+AJ0csbNncG47JrUQrKDJGUs0fLBxHG4ErEdVc/s+l/ZTGBT6kOEjk5GLQviNuAzP51em+TATR6YJ4JdgnZEU3iwbyeY/lLPPWhOVDfUgLNVKHX7Sijf83Wp+cqspAdIcnT5lWMXUe7jciKQLC0B+jD6IQ/hCqF0/yX/H8Sa8jA+qSIrXWt/qSy1viKaQ3Sf8+rXyxG6dqYc0jUweQ248FOgUCtzmaZP48SoMBATN7JPCLzhGnY8IiMErGzc6jsevmoqB0MRqZhc2qsLEfTclxsMmfx2yVKt93G+zRMtQuYmf36MvDNbPaH+/tzE8pWxufSY0672qhL0sfvNO+FuCJ8Bsk4UwKV3lTeGjCwN5o=
-  file: ${CLBLAST_TAR}
-  skip_cleanup: true
-  on:
-    repo: CNugteren/CLBlast
-    tags: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cde7ace0..169e6852 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -124,7 +124,7 @@ else()
     set(FLAGS "${FLAGS} -O2")
   endif()
   if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
-    set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn")
+    set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn -Wno-unused-function")
     if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0)
       set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable")
     endif()
@@ -138,7 +138,7 @@ else()
     set(FLAGS "${FLAGS} -Wextra -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded")
     set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch")
     set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn")
-    set(FLAGS "${FLAGS} -Wno-deprecated-declarations")
+    set(FLAGS "${FLAGS} -Wno-deprecated-declarations -Wno-unused-function")
     if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.0)  # clang 4.0 or higher
         if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0)  # but not for AppleClang
           set(FLAGS "${FLAGS} -Wno-undefined-var-template")
diff --git a/README.md b/README.md
index cbecc606..bad6d9f7 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ CLBlast: The tuned OpenCL BLAS library
 | Platform | Build status |
 |-----|-----|
 | Windows | [![Build Status](https://ci.appveyor.com/api/projects/status/github/cnugteren/clblast?branch=master&svg=true)](https://ci.appveyor.com/project/CNugteren/clblast) |
-| Linux | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
-| OS X | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
+| Linux/macOS | ![Build Status](https://github.com/cnugteren/clblast/actions/workflows/build_and_test.yml/badge.svg?branch=master)
+ |
 
 | Test machine (thanks to [ArrayFire](https://ci.arrayfire.org:8010/#/builders)) | Test status |
 |-----|-----|
diff --git a/include/clblast_half.h b/include/clblast_half.h
index b8de8537..cbea1723 100644
--- a/include/clblast_half.h
+++ b/include/clblast_half.h
@@ -18,11 +18,6 @@
 #ifndef CLBLAST_HALF_H_
 #define CLBLAST_HALF_H_
 
-// MSVC 2013 doesn't fully support C99
-#ifdef _MSC_VER
-    #define inline __inline
-#endif
-
 // =================================================================================================
 
 // The host data-type for half-precision floating-point (16-bit) is based on the `cl_half` OpenCL
@@ -40,7 +35,7 @@ typedef union ConversionBits_ {
 // Converts a IEEE-compliant single-precision value to half-precision floating-point. This function
 // applies simple truncation (round toward zero, but with overflows set to infinity) as rounding
 // mode.
-inline half FloatToHalf(const float value) {
+static half FloatToHalf(const float value) {
   static const unsigned short base_table[512] = { 
     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -101,7 +96,7 @@ inline half FloatToHalf(const float value) {
 }
 
 // Converts a half-precision value to IEEE-compliant single-precision floating-point
-inline float HalfToFloat(const half value) {
+static float HalfToFloat(const half value) {
   static const unsigned int mantissa_table[2048] = { 
     0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
     0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
diff --git a/samples/cache.c b/samples/cache.c
index 980c7cf3..2a48c321 100644
--- a/samples/cache.c
+++ b/samples/cache.c
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <time.h>
 
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
diff --git a/samples/dgemv.c b/samples/dgemv.c
index 975cb7ac..b579b25a 100644
--- a/samples/dgemv.c
+++ b/samples/dgemv.c
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
diff --git a/samples/dtrsm.cpp b/samples/dtrsm.cpp
index c558d1f5..e207e5d3 100644
--- a/samples/dtrsm.cpp
+++ b/samples/dtrsm.cpp
@@ -24,8 +24,9 @@
 
 // Includes the C++ OpenCL API. If not yet available, it can be found here:
 // https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
-#define CL_HPP_TARGET_OPENCL_VERSION 210
-#define CL_TARGET_OPENCL_VERSION 210
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_TARGET_OPENCL_VERSION 120
 #include "opencl.hpp"
 
 // Includes the CLBlast library
diff --git a/samples/haxpy.c b/samples/haxpy.c
index 4f2bb400..ad3eabae 100644
--- a/samples/haxpy.c
+++ b/samples/haxpy.c
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
diff --git a/samples/samax.c b/samples/samax.c
index 36e78846..27a8f133 100644
--- a/samples/samax.c
+++ b/samples/samax.c
@@ -19,7 +19,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#define CL_TARGET_OPENCL_VERSION 110
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
@@ -84,7 +84,7 @@ int main(void) {
   clEnqueueReadBuffer(queue, device_output, CL_TRUE, 0, 1*sizeof(unsigned int), host_output, 0, NULL, NULL);
 
   // Example completed. See "clblast_c.h" for status codes (0 -> success).
-  printf("Completed iSAMAX with status %d: array of %d values with staircases from 0..9 repeated, max at index %zu with value %.0lf\n",
+  printf("Completed iSAMAX with status %d: array of %zu values with staircases from 0..9 repeated, max at index %u with value %.0lf\n",
          status, n, host_output[0], host_input[host_output[0]]);
 
   // Clean-up
diff --git a/samples/sasum.c b/samples/sasum.c
index 78377336..36efef4c 100644
--- a/samples/sasum.c
+++ b/samples/sasum.c
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
diff --git a/samples/sgemm.c b/samples/sgemm.c
index 92f3057d..df105f17 100644
--- a/samples/sgemm.c
+++ b/samples/sgemm.c
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#define CL_TARGET_OPENCL_VERSION 120
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
 
 // Includes the CLBlast library (C interface)
diff --git a/samples/sgemm.cpp b/samples/sgemm.cpp
index 534ffa78..f2acfff6 100644
--- a/samples/sgemm.cpp
+++ b/samples/sgemm.cpp
@@ -25,8 +25,9 @@
 
 // Includes the C++ OpenCL API. If not yet available, it can be found here:
 // https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
-#define CL_HPP_TARGET_OPENCL_VERSION 210
-#define CL_TARGET_OPENCL_VERSION 210
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_TARGET_OPENCL_VERSION 120
 #include "opencl.hpp"
 
 // Includes the CLBlast library
diff --git a/samples/sgemm_batched.cpp b/samples/sgemm_batched.cpp
index a839d305..d17d31c3 100644
--- a/samples/sgemm_batched.cpp
+++ b/samples/sgemm_batched.cpp
@@ -25,8 +25,9 @@
 
 // Includes the C++ OpenCL API. If not yet available, it can be found here:
 // https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
-#define CL_HPP_TARGET_OPENCL_VERSION 210
-#define CL_TARGET_OPENCL_VERSION 210
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_TARGET_OPENCL_VERSION 120
 #include "opencl.hpp"
 
 // Includes the CLBlast library
diff --git a/samples/tuning_api.cpp b/samples/tuning_api.cpp
index 7d7294ad..a9968101 100644
--- a/samples/tuning_api.cpp
+++ b/samples/tuning_api.cpp
@@ -21,8 +21,9 @@
 
 // Includes the C++ OpenCL API. If not yet available, it can be found here:
 // https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
-#define CL_HPP_TARGET_OPENCL_VERSION 210
-#define CL_TARGET_OPENCL_VERSION 210
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_TARGET_OPENCL_VERSION 120
 #include "opencl.hpp"
 
 // Includes the CLBlast library
@@ -66,7 +67,7 @@ int main() {
 
   // Tuning completed. See "clblast.h" for status codes (0 -> success).
   printf("Completed TuneCopy with status %d (0 == OK), found parameters:\n", static_cast<int>(status));
-  for (const auto parameter: parameters) {
+  for (const auto &parameter: parameters) {
     printf(">  %s = %zu\n", parameter.first.c_str(), parameter.second);
   }
 
diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp
index b2dc6e7a..bfefadc5 100644
--- a/test/correctness/testblas.hpp
+++ b/test/correctness/testblas.hpp
@@ -157,6 +157,15 @@ template <typename T, typename U> const std::vector<Triangle> TestBlas<T,U>::kTr
 template <typename T, typename U> const std::vector<Side> TestBlas<T,U>::kSides = {Side::kLeft, Side::kRight};
 template <typename T, typename U> const std::vector<Diagonal> TestBlas<T,U>::kDiagonals = {Diagonal::kUnit, Diagonal::kNonUnit};
 
+// The transpose configurations to test with: template parameter dependent, see .cpp file for implementation
+template <> const std::vector<Transpose> TestBlas<half,half>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<float,float>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<double,double>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<float2,float2>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<double2,double2>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<float2,float>::kTransposes;
+template <> const std::vector<Transpose> TestBlas<double2,double>::kTransposes;
+
 // =================================================================================================
 
 // Bogus reference function, in case a comparison library is not available
diff --git a/test/performance/client.cpp b/test/performance/client.cpp
index e6930203..e0c99fa9 100644
--- a/test/performance/client.cpp
+++ b/test/performance/client.cpp
@@ -279,7 +279,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
       TimeResult time_cublas;
       try {
         time_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
-      } catch (std::runtime_error e) { }
+      } catch (std::runtime_error &e) { }
       CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
       HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
       timings.push_back(std::pair<std::string, TimeResult>("cuBLAS", time_cublas));
diff --git a/test/routines/levelx/xcol2im.hpp b/test/routines/levelx/xcol2im.hpp
index c740e4c7..cdac3493 100644
--- a/test/routines/levelx/xcol2im.hpp
+++ b/test/routines/levelx/xcol2im.hpp
@@ -201,7 +201,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
   auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.a_size = args.a_size; args2.b_size = args.b_size;
   args2.kernel_mode = args.kernel_mode;
diff --git a/test/routines/levelx/xconvgemm.hpp b/test/routines/levelx/xconvgemm.hpp
index 786bb733..9085d927 100644
--- a/test/routines/levelx/xconvgemm.hpp
+++ b/test/routines/levelx/xconvgemm.hpp
@@ -230,7 +230,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
   auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.a_size = args.a_size; args2.b_size = args.b_size; args2.c_size = args.c_size;
   args2.kernel_mode = args.kernel_mode;
diff --git a/test/routines/levelx/xhad.hpp b/test/routines/levelx/xhad.hpp
index 3e40de87..49629452 100644
--- a/test/routines/levelx/xhad.hpp
+++ b/test/routines/levelx/xhad.hpp
@@ -39,7 +39,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec);
   auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size;
   args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n;
@@ -152,7 +153,7 @@ public:
   // Describes how to compute the indices of the result buffer
   static size_t ResultID1(const Arguments<T> &args) { return args.n; }
   static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
-  static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
+  static size_t GetResultIndex(const Arguments<T> &, const size_t id1, const size_t) {
     return id1; // * args.z_inc + args.z_offset;
   }
 
diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp
index 2a5ebf8e..ab3b6a0f 100644
--- a/test/routines/levelx/xim2col.hpp
+++ b/test/routines/levelx/xim2col.hpp
@@ -200,7 +200,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
   auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.a_size = args.a_size; args2.b_size = args.b_size;
   args2.kernel_mode = args.kernel_mode;
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index 126856ac..54905de1 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -108,7 +108,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
   auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.a_size = args.a_size; args2.b_size = args.b_size;
   args2.a_ld = args.a_ld; args2.m = args.m; args2.n = args.n;
diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp
index 4a93b29d..a4d98e79 100644
--- a/test/routines/levelx/xomatcopy.hpp
+++ b/test/routines/levelx/xomatcopy.hpp
@@ -59,7 +59,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
   auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
   auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
   auto dummy = std::vector<float>(0);
-  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+  auto dummy_uint = std::vector<unsigned int>(0);
+  auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
   auto args2 = Arguments<float>();
   args2.a_size = args.a_size; args2.b_size = args.b_size;
   args2.a_ld = args.a_ld; args2.b_ld = args.b_ld; args2.m = args.m; args2.n = args.n;