summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-04-16 19:41:14 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-04-16 19:41:14 +0200
commit2673f5051820db82ebb857d88c2f36f7cacbed7d (patch)
treef3323af174bde2793b3c4692f3404d2a18c5eadb
parent063ef729e123aa2cebc7f67c73f99f3e15606fe2 (diff)
parentb20c518f9fd05a69957c2018e72c6a648f5cdb7d (diff)
Merge branch 'development' into benchmarking
-rw-r--r--CHANGELOG1
-rw-r--r--CMakeLists.txt21
-rw-r--r--README.md2
-rw-r--r--cmake/Modules/FindcuBLAS.cmake82
-rwxr-xr-xscripts/generator/generator.py11
-rw-r--r--scripts/generator/generator/convert.py13
-rw-r--r--scripts/generator/generator/cpp.py64
-rw-r--r--scripts/generator/generator/datatype.py11
-rw-r--r--scripts/generator/generator/routine.py110
-rw-r--r--src/utilities/utilities.hpp5
-rw-r--r--test/correctness/misc/override_parameters.cpp6
-rw-r--r--test/correctness/routines/level1/xamax.cpp8
-rw-r--r--test/correctness/routines/level1/xasum.cpp8
-rw-r--r--test/correctness/routines/level1/xaxpy.cpp8
-rw-r--r--test/correctness/routines/level1/xcopy.cpp8
-rw-r--r--test/correctness/routines/level1/xdot.cpp4
-rw-r--r--test/correctness/routines/level1/xdotc.cpp8
-rw-r--r--test/correctness/routines/level1/xdotu.cpp8
-rw-r--r--test/correctness/routines/level1/xnrm2.cpp8
-rw-r--r--test/correctness/routines/level1/xrot.cpp4
-rw-r--r--test/correctness/routines/level1/xrotg.cpp4
-rw-r--r--test/correctness/routines/level1/xrotm.cpp4
-rw-r--r--test/correctness/routines/level1/xrotmg.cpp4
-rw-r--r--test/correctness/routines/level1/xscal.cpp8
-rw-r--r--test/correctness/routines/level1/xswap.cpp8
-rw-r--r--test/correctness/routines/level2/xgbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xgemv.cpp8
-rw-r--r--test/correctness/routines/level2/xger.cpp4
-rw-r--r--test/correctness/routines/level2/xgerc.cpp8
-rw-r--r--test/correctness/routines/level2/xgeru.cpp8
-rw-r--r--test/correctness/routines/level2/xhbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xhemv.cpp8
-rw-r--r--test/correctness/routines/level2/xher.cpp8
-rw-r--r--test/correctness/routines/level2/xher2.cpp8
-rw-r--r--test/correctness/routines/level2/xhpmv.cpp8
-rw-r--r--test/correctness/routines/level2/xhpr.cpp8
-rw-r--r--test/correctness/routines/level2/xhpr2.cpp8
-rw-r--r--test/correctness/routines/level2/xsbmv.cpp4
-rw-r--r--test/correctness/routines/level2/xspmv.cpp4
-rw-r--r--test/correctness/routines/level2/xspr.cpp4
-rw-r--r--test/correctness/routines/level2/xspr2.cpp4
-rw-r--r--test/correctness/routines/level2/xsymv.cpp4
-rw-r--r--test/correctness/routines/level2/xsyr.cpp4
-rw-r--r--test/correctness/routines/level2/xsyr2.cpp4
-rw-r--r--test/correctness/routines/level2/xtbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtbsv.cpp8
-rw-r--r--test/correctness/routines/level2/xtpmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtpsv.cpp8
-rw-r--r--test/correctness/routines/level2/xtrmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtrsv.cpp8
-rw-r--r--test/correctness/routines/level3/xgemm.cpp8
-rw-r--r--test/correctness/routines/level3/xhemm.cpp8
-rw-r--r--test/correctness/routines/level3/xher2k.cpp8
-rw-r--r--test/correctness/routines/level3/xherk.cpp8
-rw-r--r--test/correctness/routines/level3/xsymm.cpp8
-rw-r--r--test/correctness/routines/level3/xsyr2k.cpp8
-rw-r--r--test/correctness/routines/level3/xsyrk.cpp8
-rw-r--r--test/correctness/routines/level3/xtrmm.cpp8
-rw-r--r--test/correctness/routines/level3/xtrsm.cpp8
-rw-r--r--test/correctness/routines/levelx/xaxpybatched.cpp8
-rw-r--r--test/correctness/routines/levelx/xgemmbatched.cpp8
-rw-r--r--test/correctness/routines/levelx/xomatcopy.cpp8
-rw-r--r--test/correctness/tester.cpp44
-rw-r--r--test/correctness/tester.hpp1
-rw-r--r--test/performance/client.cpp38
-rw-r--r--test/performance/client.hpp12
-rw-r--r--test/performance/routines/level1/xamax.cpp8
-rw-r--r--test/performance/routines/level1/xasum.cpp8
-rw-r--r--test/performance/routines/level1/xaxpy.cpp8
-rw-r--r--test/performance/routines/level1/xcopy.cpp8
-rw-r--r--test/performance/routines/level1/xdot.cpp4
-rw-r--r--test/performance/routines/level1/xdotc.cpp8
-rw-r--r--test/performance/routines/level1/xdotu.cpp8
-rw-r--r--test/performance/routines/level1/xnrm2.cpp8
-rw-r--r--test/performance/routines/level1/xrot.cpp4
-rw-r--r--test/performance/routines/level1/xrotg.cpp4
-rw-r--r--test/performance/routines/level1/xrotm.cpp4
-rw-r--r--test/performance/routines/level1/xrotmg.cpp4
-rw-r--r--test/performance/routines/level1/xscal.cpp8
-rw-r--r--test/performance/routines/level1/xswap.cpp8
-rw-r--r--test/performance/routines/level2/xgbmv.cpp8
-rw-r--r--test/performance/routines/level2/xgemv.cpp8
-rw-r--r--test/performance/routines/level2/xger.cpp4
-rw-r--r--test/performance/routines/level2/xgerc.cpp8
-rw-r--r--test/performance/routines/level2/xgeru.cpp8
-rw-r--r--test/performance/routines/level2/xhbmv.cpp8
-rw-r--r--test/performance/routines/level2/xhemv.cpp8
-rw-r--r--test/performance/routines/level2/xher.cpp8
-rw-r--r--test/performance/routines/level2/xher2.cpp8
-rw-r--r--test/performance/routines/level2/xhpmv.cpp8
-rw-r--r--test/performance/routines/level2/xhpr.cpp8
-rw-r--r--test/performance/routines/level2/xhpr2.cpp8
-rw-r--r--test/performance/routines/level2/xsbmv.cpp4
-rw-r--r--test/performance/routines/level2/xspmv.cpp4
-rw-r--r--test/performance/routines/level2/xspr.cpp4
-rw-r--r--test/performance/routines/level2/xspr2.cpp4
-rw-r--r--test/performance/routines/level2/xsymv.cpp4
-rw-r--r--test/performance/routines/level2/xsyr.cpp4
-rw-r--r--test/performance/routines/level2/xsyr2.cpp4
-rw-r--r--test/performance/routines/level2/xtbmv.cpp8
-rw-r--r--test/performance/routines/level2/xtbsv.cpp8
-rw-r--r--test/performance/routines/level2/xtpmv.cpp8
-rw-r--r--test/performance/routines/level2/xtpsv.cpp8
-rw-r--r--test/performance/routines/level2/xtrmv.cpp8
-rw-r--r--test/performance/routines/level2/xtrsv.cpp8
-rw-r--r--test/performance/routines/level3/xgemm.cpp8
-rw-r--r--test/performance/routines/level3/xhemm.cpp8
-rw-r--r--test/performance/routines/level3/xher2k.cpp8
-rw-r--r--test/performance/routines/level3/xherk.cpp8
-rw-r--r--test/performance/routines/level3/xsymm.cpp8
-rw-r--r--test/performance/routines/level3/xsyr2k.cpp8
-rw-r--r--test/performance/routines/level3/xsyrk.cpp8
-rw-r--r--test/performance/routines/level3/xtrmm.cpp8
-rw-r--r--test/performance/routines/level3/xtrsm.cpp8
-rw-r--r--test/performance/routines/levelx/xaxpybatched.cpp8
-rw-r--r--test/performance/routines/levelx/xgemmbatched.cpp8
-rw-r--r--test/performance/routines/levelx/xomatcopy.cpp8
-rw-r--r--test/routines/common.hpp36
-rw-r--r--test/routines/level1/xamax.hpp20
-rw-r--r--test/routines/level1/xasum.hpp20
-rw-r--r--test/routines/level1/xaxpy.hpp20
-rw-r--r--test/routines/level1/xcopy.hpp20
-rw-r--r--test/routines/level1/xdot.hpp21
-rw-r--r--test/routines/level1/xdotc.hpp21
-rw-r--r--test/routines/level1/xdotu.hpp21
-rw-r--r--test/routines/level1/xnrm2.hpp20
-rw-r--r--test/routines/level1/xscal.hpp19
-rw-r--r--test/routines/level1/xswap.hpp20
-rw-r--r--test/routines/level2/xgbmv.hpp23
-rw-r--r--test/routines/level2/xgemv.hpp23
-rw-r--r--test/routines/level2/xger.hpp22
-rw-r--r--test/routines/level2/xgerc.hpp22
-rw-r--r--test/routines/level2/xgeru.hpp22
-rw-r--r--test/routines/level2/xhbmv.hpp23
-rw-r--r--test/routines/level2/xhemv.hpp23
-rw-r--r--test/routines/level2/xher.hpp22
-rw-r--r--test/routines/level2/xher2.hpp23
-rw-r--r--test/routines/level2/xhpmv.hpp23
-rw-r--r--test/routines/level2/xhpr.hpp22
-rw-r--r--test/routines/level2/xhpr2.hpp23
-rw-r--r--test/routines/level2/xsbmv.hpp23
-rw-r--r--test/routines/level2/xspmv.hpp23
-rw-r--r--test/routines/level2/xspr.hpp22
-rw-r--r--test/routines/level2/xspr2.hpp23
-rw-r--r--test/routines/level2/xsymv.hpp23
-rw-r--r--test/routines/level2/xsyr.hpp22
-rw-r--r--test/routines/level2/xsyr2.hpp23
-rw-r--r--test/routines/level2/xtbmv.hpp24
-rw-r--r--test/routines/level2/xtpmv.hpp24
-rw-r--r--test/routines/level2/xtrmv.hpp24
-rw-r--r--test/routines/level2/xtrsv.hpp24
-rw-r--r--test/routines/level3/xgemm.hpp24
-rw-r--r--test/routines/level3/xhemm.hpp24
-rw-r--r--test/routines/level3/xher2k.hpp25
-rw-r--r--test/routines/level3/xherk.hpp23
-rw-r--r--test/routines/level3/xsymm.hpp24
-rw-r--r--test/routines/level3/xsyr2k.hpp24
-rw-r--r--test/routines/level3/xsyrk.hpp23
-rw-r--r--test/routines/level3/xtrmm.hpp25
-rw-r--r--test/routines/level3/xtrsm.hpp26
-rw-r--r--test/routines/levelx/xaxpybatched.hpp25
-rw-r--r--test/routines/levelx/xgemmbatched.hpp27
-rw-r--r--test/routines/levelx/xinvert.hpp8
-rw-r--r--test/routines/levelx/xomatcopy.hpp6
-rw-r--r--test/wrapper_cblas.hpp268
-rw-r--r--test/wrapper_cublas.hpp2548
-rw-r--r--test/wrapper_cuda.hpp149
167 files changed, 4003 insertions, 1143 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0b4e9951..6643cc32 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -8,6 +8,7 @@ Development version (next release)
- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
- Tests now also check for the L2 error in case of half-precision
+- Clients can now test against cuBLAS on NVIDIA systems for performance comparisons (-DCUBLAS=ON)
- Replaced the R graph scripts with Python/Matplotlib scripts
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see README)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62cf00cc..b26de79a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,7 @@ option(TUNERS "Enable compilation of the tuners" OFF)
option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF)
option(TESTS "Enable compilation of the correctness tests" OFF)
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
+option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF)
# Compile in verbose mode with additional diagnostic messages
option(VERBOSE "Compile in verbose mode for additional diagnostic messages" OFF)
@@ -129,11 +130,14 @@ if(TUNERS)
endif()
endif()
-# Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake"
-# and "FindCBLAS.cmake" are included.
+# Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake",
+# "FindCBLAS.cmake" and "FindcuBLAS.cmake" are included.
if(CLIENTS OR TESTS)
find_package(clBLAS)
find_package(CBLAS)
+ if(CUBLAS)
+ find_package(cuBLAS)
+ endif()
if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND)
if(TESTS)
message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests")
@@ -320,13 +324,22 @@ if(CLIENTS OR TESTS)
add_definitions(" -DCLBLAST_REF_CBLAS")
endif()
endif()
+ if(CUBLAS_FOUND)
+ set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS})
+ set(REF_LIBRARIES ${REF_LIBRARIES} ${CUDA_LIBRARIES} ${CUBLAS_LIBRARIES})
+ if(MSVC)
+ add_definitions(" /DCLBLAST_REF_CUBLAS")
+ else()
+ add_definitions(" -DCLBLAST_REF_CUBLAS")
+ endif()
+ endif()
endif()
# ==================================================================================================
# Section for the performance tests (i.e. the client). These compare against optionally a reference
-# library, either clBLAS or a CPU BLAS.
+# library, either clBLAS, a CPU BLAS, or CUDA's cuBLAS.
if(CLIENTS)
# Visual Studio requires the sources of non-exported objects/libraries
@@ -372,7 +385,7 @@ endif()
# ==================================================================================================
# Section for the correctness tests. Note that these tests require the presence of clBLAS and/or a
-# CPU BLAS library to act as a reference.
+# CPU BLAS library, and/or cuBLAS to act as a reference.
if(TESTS)
enable_testing()
diff --git a/README.md b/README.md
index 3109b4bf..835f5eea 100644
--- a/README.md
+++ b/README.md
@@ -199,7 +199,7 @@ All tests can be run directly together in one go through the `make alltests` tar
Compiling the performance tests/clients (optional)
-------------
-To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS) or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows:
+To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS), cuBLAS (if testing on an NVIDIA GPU and `-DCUBLAS=ON` set), or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows:
cmake -DCLIENTS=ON ..
diff --git a/cmake/Modules/FindcuBLAS.cmake b/cmake/Modules/FindcuBLAS.cmake
new file mode 100644
index 00000000..e470289b
--- /dev/null
+++ b/cmake/Modules/FindcuBLAS.cmake
@@ -0,0 +1,82 @@
+
+# ==================================================================================================
+# This file is part of the cuBLASt project. The project is licensed under Apache Version 2.0. This
+# project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+# width of 100 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+#
+# ==================================================================================================
+#
+# Defines the following variables:
+# CUBLAS_FOUND Boolean holding whether or not the cuBLAS library was found
+# CUBLAS_INCLUDE_DIRS The CUDA and cuBLAS include directory
+# CUDA_LIBRARIES The CUDA library
+# CUBLAS_LIBRARIES The cuBLAS library
+#
+# In case CUDA is not installed in the default directory, set the CUDA_ROOT variable to point to
+# the root of cuBLAS, such that 'cublas_v2.h' can be found in $CUDA_ROOT/include. This can either be
+# done using an environmental variable (e.g. export CUDA_ROOT=/path/to/cuBLAS) or using a CMake
+# variable (e.g. cmake -DCUDA_ROOT=/path/to/cuBLAS ..).
+#
+# ==================================================================================================
+
+# Sets the possible install locations
+set(CUBLAS_HINTS
+ ${CUDA_ROOT}
+ $ENV{CUDA_ROOT}
+ $ENV{CUDA_TOOLKIT_ROOT_DIR}
+)
+set(CUBLAS_PATHS
+ /usr
+ /usr/local
+ /usr/local/cuda
+)
+
+# Finds the include directories
+find_path(CUBLAS_INCLUDE_DIRS
+ NAMES cublas_v2.h cuda.h
+ HINTS ${CUBLAS_HINTS}
+ PATH_SUFFIXES include inc include/x86_64 include/x64
+ PATHS ${CUBLAS_PATHS}
+ DOC "cuBLAS include header cublas_v2.h"
+)
+mark_as_advanced(CUBLAS_INCLUDE_DIRS)
+
+# Finds the libraries
+find_library(CUDA_LIBRARIES
+ NAMES cudart
+ HINTS ${CUBLAS_HINTS}
+ PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 lib/import lib64/import
+ PATHS ${CUBLAS_PATHS}
+ DOC "CUDA library"
+)
+mark_as_advanced(CUDA_LIBRARIES)
+find_library(CUBLAS_LIBRARIES
+ NAMES cublas
+ HINTS ${CUBLAS_HINTS}
+ PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 lib/import lib64/import
+ PATHS ${CUBLAS_PATHS}
+ DOC "cuBLAS library"
+)
+mark_as_advanced(CUBLAS_LIBRARIES)
+
+# ==================================================================================================
+
+# Notification messages
+if(NOT CUBLAS_INCLUDE_DIRS)
+ message(STATUS "Could NOT find 'cuBLAS.h', install CUDA/cuBLAS or set CUDA_ROOT")
+endif()
+if(NOT CUDA_LIBRARIES)
+ message(STATUS "Could NOT find CUDA library, install it or set CUDA_ROOT")
+endif()
+if(NOT CUBLAS_LIBRARIES)
+ message(STATUS "Could NOT find cuBLAS library, install it or set CUDA_ROOT")
+endif()
+
+# Determines whether or not cuBLAS was found
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(cuBLAS DEFAULT_MSG CUBLAS_INCLUDE_DIRS CUDA_LIBRARIES CUBLAS_LIBRARIES)
+
+# ==================================================================================================
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 6ec67052..f7ef4528 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -38,11 +38,12 @@ FILES = [
"/src/clblast_c.cpp",
"/test/wrapper_clblas.hpp",
"/test/wrapper_cblas.hpp",
+ "/test/wrapper_cublas.hpp",
"/include/clblast_netlib_c.h",
"/src/clblast_netlib_c.cpp",
]
-HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32]
-FOOTER_LINES = [25, 139, 27, 38, 6, 6, 9, 2]
+HEADER_LINES = [122, 77, 126, 23, 29, 41, 29, 65, 32]
+FOOTER_LINES = [25, 139, 27, 38, 6, 6, 6, 9, 2]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 63
@@ -194,7 +195,7 @@ def main(argv):
# Re-writes the body of the file
with open(library_root + FILES[i], "w") as f:
body = ""
- levels = [1, 2, 3] if (i == 4 or i == 5) else [1, 2, 3, 4]
+ levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4]
for level in levels:
body += cpp.LEVEL_SEPARATORS[level - 1] + "\n"
for routine in ROUTINES[level - 1]:
@@ -211,9 +212,11 @@ def main(argv):
if i == 5:
body += cpp.wrapper_cblas(routine)
if i == 6:
+ body += cpp.wrapper_cublas(routine)
+ if i == 7:
if not routine.batched:
body += cpp.clblast_netlib_c_h(routine)
- if i == 7:
+ if i == 8:
if not routine.batched:
body += cpp.clblast_netlib_c_cc(routine)
f.write("".join(file_header))
diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py
index c0309ec3..07f45669 100644
--- a/scripts/generator/generator/convert.py
+++ b/scripts/generator/generator/convert.py
@@ -56,6 +56,19 @@ def option_to_cblas(x):
}[x]
+def option_to_cublas(x):
+ """As above, but for clBLAS data-types"""
+ return {
+ 'layout': "Layout",
+ 'a_transpose': "cublasOperation_t",
+ 'b_transpose': "cublasOperation_t",
+ 'ab_transpose': "cublasOperation_t",
+ 'side': "cublasSideMode_t",
+ 'triangle': "cublasFillMode_t",
+ 'diagonal': "cublasDiagType_t",
+ }[x]
+
+
def option_to_documentation(x):
"""Translates an option name to a documentation string"""
return {
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 91fdf458..17e418e3 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -290,14 +290,69 @@ def wrapper_cblas(routine):
return result
+def wrapper_cublas(routine):
+ """The wrapper to the reference cuBLAS routines (for performance/correctness testing)"""
+ result = ""
+ if routine.has_tests:
+ result += NL + "// Forwards the cuBLAS calls for %s" % routine.short_names_tested() + NL
+ if routine.no_scalars():
+ result += routine.routine_header_wrapper_cublas(routine.template, True, 23) + ";" + NL
+ for flavour in routine.flavours:
+ result += routine.routine_header_wrapper_cublas(flavour, False, 23) + " {" + NL
+
+ # There is a version available in cuBLAS
+ if flavour.precision_name in ["S", "D", "C", "Z"]:
+ indent = " " * (24 + routine.length())
+ arguments = routine.arguments_wrapper_cublas(flavour)
+
+ # Handles row-major
+ if routine.has_layout():
+ result += " if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }" + NL
+
+ # Complex scalars
+ for scalar in routine.scalars:
+ if flavour.is_complex(scalar):
+ cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex"
+ result += " " + cuda_complex + " " + scalar + "_cuda;" + NL
+ result += " " + scalar + "_cuda.x = " + scalar + ".real();" + NL
+ result += " " + scalar + "_cuda.y = " + scalar + ".imag();" + NL
+
+ # Calls the cuBLAS routine
+ result += " auto status = cublas" + flavour.name_cublas() + routine.name + "(handle, "
+ result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL
+ result += " cudaDeviceSynchronize();" + NL
+ result += " return status;"
+
+ # There is no cuBLAS available, forward the call to one of the available functions
+ else: # Half-precision
+ result += " return CUBLAS_STATUS_NOT_SUPPORTED;"
+ # indent = " " * (24 + routine.length())
+
+ # # Convert to float (note: also integer buffers are stored as half/float)
+ # for buf in routine.inputs + routine.outputs:
+ # result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
+
+ # # Call the float routine
+ # result += " return cublasX" + routine.name + "(handle,"
+ # result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL
+ # result += " cudaDeviceSynchronize();" + NL
+ # result += " return status;"
+
+ # # Convert back to half
+ # for buf in routine.outputs:
+ # result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
+ # result += " return status;"
+
+ # Complete
+ result += NL + "}" + NL
+ return result
+
+
def performance_test(routine, level_string):
"""Generates the body of a performance test for a specific routine"""
result = ""
result += "#include \"test/performance/client.hpp\"" + NL
result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL
- result += "// Shortcuts to the clblast namespace" + NL
- result += "using float2 = clblast::float2;" + NL
- result += "using double2 = clblast::double2;" + NL + NL
result += "// Main function (not within the clblast namespace)" + NL
result += "int main(int argc, char *argv[]) {" + NL
result += " const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);" + NL
@@ -324,9 +379,6 @@ def correctness_test(routine, level_string):
result = ""
result += "#include \"test/correctness/testblas.hpp\"" + NL
result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL
- result += "// Shortcuts to the clblast namespace" + NL
- result += "using float2 = clblast::float2;" + NL
- result += "using double2 = clblast::double2;" + NL + NL
result += "// Main function (not within the clblast namespace)" + NL
result += "int main(int argc, char *argv[]) {" + NL
result += " auto errors = size_t{0};" + NL
diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py
index cfdbf748..6ac5681a 100644
--- a/scripts/generator/generator/datatype.py
+++ b/scripts/generator/generator/datatype.py
@@ -72,9 +72,11 @@ class DataType:
def test_template(self):
"""Returns the template as used in the correctness/performance tests"""
+ buffer_type = "clblast::" + self.buffer_type if self.buffer_type in [D_FLOAT2, D_DOUBLE2] else self.buffer_type
+ beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_FLOAT2, D_DOUBLE2] else self.beta_cpp
if self.buffer_type != self.beta_cpp:
- return "<" + self.buffer_type + "," + self.beta_cpp + ">, " + self.buffer_type + ", " + self.beta_cpp
- return "<" + self.buffer_type + ">, " + self.buffer_type + ", " + self.beta_cpp
+ return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp
+ return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp
def is_complex(self, scalar):
"""Current scalar is complex"""
@@ -85,6 +87,11 @@ class DataType:
"""Current type is of a non-standard type"""
return self.buffer_type in [D_HALF, D_FLOAT2, D_DOUBLE2]
+ def name_cublas(self):
+ if "i" in self.name:
+ return "I" + self.name[1].lower()
+ return self.name
+
# Regular data-types
H = DataType("H", "H", D_HALF, [D_HALF] * 2 + [D_HALF_OPENCL] * 2, D_HALF) # half (16)
diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py
index 59b2ed73..1c534611 100644
--- a/scripts/generator/generator/routine.py
+++ b/scripts/generator/generator/routine.py
@@ -197,6 +197,10 @@ class Routine:
"""Determines whether or not this routine has scalar arguments (alpha/beta)"""
return self.scalars == []
+ def has_layout(self):
+ """Determines whether the layout is an argument"""
+ return "layout" in self.options
+
def short_names(self):
"""Returns the upper-case names of these routines (all flavours)"""
return "/".join([f.name + self.upper_name() for f in self.flavours])
@@ -257,7 +261,7 @@ class Routine:
return []
def buffer_def_wrapper_cl(self, name, flavour):
- """As above but with data-types"""
+ """As above but for OpenCL"""
prefix = "const " if name in self.inputs else ""
if name in self.inputs or name in self.outputs:
a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
@@ -266,6 +270,16 @@ class Routine:
return [", ".join(a + b + c)]
return []
+ def buffer_def_wrapper_cuda(self, name, flavour):
+ """As above but for CUDA"""
+ prefix = "const " if name in self.inputs else ""
+ if name in self.inputs or name in self.outputs:
+ a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
+ b = ["const size_t " + name + "_offset"]
+ c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
+ return [", ".join(a + b + c)]
+ return []
+
def buffer_def_vector(self, name, flavour):
"""As above but as vectors"""
prefix = "const " if name in self.inputs else ""
@@ -329,6 +343,32 @@ class Routine:
return [", ".join(a + c)]
return []
+ def buffer_wrapper_cublas(self, name, flavour):
+ """As above but for cuBLAS the wrapper"""
+ prefix = "const " if name in self.inputs else ""
+ if name in self.inputs or name in self.outputs:
+ if name in self.index_buffers():
+ a = ["reinterpret_cast<int*>(&" + name + "_buffer[" + name + "_offset])"]
+ elif name in self.outputs and flavour.name in ["Sc", "Dz"]:
+ dtype = "float" if flavour.name == "Sc" else "double"
+ a = ["reinterpret_cast<" + dtype + "*>(&" + name + "_buffer[" + name + "_offset])"]
+ elif flavour.precision_name in ["C", "Z"]:
+ cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex"
+ a = ["reinterpret_cast<" + prefix + cuda_complex + "*>" +
+ "(&" + name + "_buffer[" + name + "_offset])"]
+ else:
+ a = ["&" + name + "_buffer[" + name + "_offset]"]
+ c = []
+ if name in ["x", "y"]:
+ c = ["static_cast<int>(" + name + "_" + self.postfix(name) + ")"]
+ elif name in ["a", "b", "c"]:
+ c = [name + "_" + self.postfix(name)]
+ result = [", ".join(a + c)]
+ if self.name == "trmm" and name == "a":
+ result *= 2
+ return result
+ return []
+
def buffer_type(self, name):
"""As above, but only data-types"""
prefix = "const " if (name in self.inputs) else ""
@@ -407,6 +447,14 @@ class Routine:
return [name]
return []
+ def scalar_use_wrapper_cublas(self, name, flavour):
+ """As above, but for the cuBLAS wrapper"""
+ if name in self.scalars:
+ if flavour.is_complex(name):
+ return ["&" + name + "_cuda"]
+ return ["&" + name]
+ return []
+
def scalar_def(self, name, flavour):
"""Retrieves the definition of a scalar (alpha/beta)"""
if name in self.scalars:
@@ -465,6 +513,12 @@ class Routine:
return [", ".join([s for s in self.sizes])]
return []
+ def sizes_list_as_int(self):
+ """Retrieves a list of comma-separated sizes (m, n, k) cast to integers"""
+ if self.sizes:
+ return [", ".join(["static_cast<int>(" + s + ")" for s in self.sizes])]
+ return []
+
def sizes_def(self):
"""Retrieves the definition of the sizes (m,n,k)"""
if self.sizes:
@@ -496,6 +550,15 @@ class Routine:
return [", ".join(self.options)]
return []
+ def options_list_no_layout(self):
+ """Retrieves a list of options"""
+ options = self.options[:]
+ if "layout" in options:
+ options.remove("layout")
+ if options:
+ return [", ".join(options)]
+ return []
+
def options_cast(self, indent):
"""As above, but now casted to CLBlast data-types"""
if self.options:
@@ -531,6 +594,13 @@ class Routine:
return [", ".join(definitions)]
return []
+ def options_def_wrapper_cublas(self):
+ """As above, but now using cuBLAS data-types"""
+ if self.options:
+ definitions = ["const " + convert.option_to_cublas(o) + " " + o for o in self.options]
+ return [", ".join(definitions)]
+ return []
+
def options_type(self):
"""Retrieves the types of the options (layout, transpose, side, etc.)"""
if self.options:
@@ -615,7 +685,7 @@ class Routine:
def arguments_wrapper_cblas(self, flavour):
"""As above, but for the CBLAS wrapper"""
- return (self.options_list() + self.sizes_list() +
+ return (self.options_list() + self.sizes_list_as_int() +
self.scalar_use_wrapper_cblas("alpha", flavour) +
list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.buffers_first()])) +
self.scalar_use_wrapper_cblas("beta", flavour) +
@@ -623,6 +693,17 @@ class Routine:
list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.scalar_buffers_second()])) +
list(chain(*[self.scalar_use_wrapper_cblas(s, flavour) for s in self.other_scalars()])))
+ def arguments_wrapper_cublas(self, flavour):
+ """As above, but for the cuBLAS wrapper"""
+ return (self.options_list_no_layout() + self.sizes_list_as_int() +
+ self.scalar_use_wrapper_cublas("alpha", flavour) +
+ list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_first()])) +
+ self.scalar_use_wrapper_cublas("beta", flavour) +
+ list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_second()])) +
+ list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_first()])) +
+ list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_second()])) +
+ list(chain(*[self.scalar_use_wrapper_cublas(s, flavour) for s in self.other_scalars()])))
+
def arguments_def(self, flavour):
"""Retrieves a combination of all the argument definitions"""
return (self.options_def() + self.sizes_def() +
@@ -683,6 +764,17 @@ class Routine:
list(chain(*[self.buffer_def_vector(b, flavour) for b in self.scalar_buffers_second()])) +
list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()])))
+ def arguments_def_wrapper_cublas(self, flavour):
+ """As above, but cuBLAS wrapper plain data-types"""
+ return (self.options_def_wrapper_cublas() + self.sizes_def() +
+ list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_first()])) +
+ self.scalar_def_plain("alpha", flavour) +
+ list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_first()])) +
+ self.scalar_def_plain("beta", flavour) +
+ list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_second()])) +
+ list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_second()])) +
+ list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()])))
+
def arguments_type(self, flavour):
"""Retrieves a combination of all the argument types"""
return (self.options_type() + self.sizes_type() +
@@ -781,3 +873,17 @@ class Routine:
result = "void cblasX" + self.name + "("
result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cblas(flavour)]) + ")"
return result
+
+ def routine_header_wrapper_cublas(self, flavour, def_only, spaces):
+ """As above, but now for the cuBLAS wrapper"""
+ template = "<" + flavour.template + ">" if self.no_scalars() and not def_only else ""
+ indent = " " * (spaces + self.length() + len(template))
+ result = ""
+ if self.no_scalars():
+ result += "template <"
+ if def_only:
+ result += flavour.name
+ result += ">\n"
+ result += "cublasStatus_t cublasX" + self.name + template + "(cublasHandle_t handle, "
+ result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")"
+ return result
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 535560a3..b40ec541 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -81,6 +81,7 @@ constexpr auto kArgFraction = "fraction";
// The client-specific arguments in string form
constexpr auto kArgCompareclblas = "clblas";
constexpr auto kArgComparecblas = "cblas";
+constexpr auto kArgComparecublas = "cublas";
constexpr auto kArgStepSize = "step";
constexpr auto kArgNumSteps = "num_steps";
constexpr auto kArgNumRuns = "runs";
@@ -188,9 +189,13 @@ struct Arguments {
// Client-specific arguments
int compare_clblas = 1;
int compare_cblas = 1;
+ int compare_cublas = 1;
size_t step = 1;
size_t num_steps = 0;
size_t num_runs = 10;
+ #ifdef CLBLAST_REF_CUBLAS
+ void* cublas_handle; // cublasHandle_t
+ #endif
// Common arguments
size_t platform_id = 0;
size_t device_id = 0;
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp
index e6eebef7..4283c039 100644
--- a/test/correctness/misc/override_parameters.cpp
+++ b/test/correctness/misc/override_parameters.cpp
@@ -129,15 +129,11 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st
// =================================================================================================
} // namespace clblast
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunOverrideTests<float>(argc, argv, false, "SGEMM");
- errors += clblast::RunOverrideTests<float2>(argc, argv, true, "CGEMM");
+ errors += clblast::RunOverrideTests<clblast::float2>(argc, argv, true, "CGEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xamax.cpp b/test/correctness/routines/level1/xamax.cpp
index 607637e8..d940ae7a 100644
--- a/test/correctness/routines/level1/xamax.cpp
+++ b/test/correctness/routines/level1/xamax.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xamax.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXamax<float>, float, float>(argc, argv, false, "iSAMAX");
errors += clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX");
- errors += clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX");
- errors += clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX");
+ errors += clblast::RunTests<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "iCAMAX");
+ errors += clblast::RunTests<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "iZAMAX");
errors += clblast::RunTests<clblast::TestXamax<half>, half, half>(argc, argv, true, "iHAMAX");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xasum.cpp b/test/correctness/routines/level1/xasum.cpp
index e22e42a6..b969d662 100644
--- a/test/correctness/routines/level1/xasum.cpp
+++ b/test/correctness/routines/level1/xasum.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xasum.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXasum<float>, float, float>(argc, argv, false, "SASUM");
errors += clblast::RunTests<clblast::TestXasum<double>, double, double>(argc, argv, true, "DASUM");
- errors += clblast::RunTests<clblast::TestXasum<float2>, float2, float2>(argc, argv, true, "ScASUM");
- errors += clblast::RunTests<clblast::TestXasum<double2>, double2, double2>(argc, argv, true, "DzASUM");
+ errors += clblast::RunTests<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScASUM");
+ errors += clblast::RunTests<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzASUM");
errors += clblast::RunTests<clblast::TestXasum<half>, half, half>(argc, argv, true, "HASUM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xaxpy.cpp b/test/correctness/routines/level1/xaxpy.cpp
index 064172fa..6f4f34fb 100644
--- a/test/correctness/routines/level1/xaxpy.cpp
+++ b/test/correctness/routines/level1/xaxpy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xaxpy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXaxpy<float>, float, float>(argc, argv, false, "SAXPY");
errors += clblast::RunTests<clblast::TestXaxpy<double>, double, double>(argc, argv, true, "DAXPY");
- errors += clblast::RunTests<clblast::TestXaxpy<float2>, float2, float2>(argc, argv, true, "CAXPY");
- errors += clblast::RunTests<clblast::TestXaxpy<double2>, double2, double2>(argc, argv, true, "ZAXPY");
+ errors += clblast::RunTests<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPY");
+ errors += clblast::RunTests<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPY");
errors += clblast::RunTests<clblast::TestXaxpy<half>, half, half>(argc, argv, true, "HAXPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xcopy.cpp b/test/correctness/routines/level1/xcopy.cpp
index e6f2581b..e6e94d34 100644
--- a/test/correctness/routines/level1/xcopy.cpp
+++ b/test/correctness/routines/level1/xcopy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXcopy<float>, float, float>(argc, argv, false, "SCOPY");
errors += clblast::RunTests<clblast::TestXcopy<double>, double, double>(argc, argv, true, "DCOPY");
- errors += clblast::RunTests<clblast::TestXcopy<float2>, float2, float2>(argc, argv, true, "CCOPY");
- errors += clblast::RunTests<clblast::TestXcopy<double2>, double2, double2>(argc, argv, true, "ZCOPY");
+ errors += clblast::RunTests<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CCOPY");
+ errors += clblast::RunTests<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZCOPY");
errors += clblast::RunTests<clblast::TestXcopy<half>, half, half>(argc, argv, true, "HCOPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xdot.cpp b/test/correctness/routines/level1/xdot.cpp
index 080250cb..8dccbf26 100644
--- a/test/correctness/routines/level1/xdot.cpp
+++ b/test/correctness/routines/level1/xdot.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xdotc.cpp b/test/correctness/routines/level1/xdotc.cpp
index 2a7bbeca..59eedddc 100644
--- a/test/correctness/routines/level1/xdotc.cpp
+++ b/test/correctness/routines/level1/xdotc.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdotc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXdotc<float2>, float2, float2>(argc, argv, false, "CDOTC");
- errors += clblast::RunTests<clblast::TestXdotc<double2>, double2, double2>(argc, argv, true, "ZDOTC");
+ errors += clblast::RunTests<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTC");
+ errors += clblast::RunTests<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTC");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xdotu.cpp b/test/correctness/routines/level1/xdotu.cpp
index 1047d021..4392326d 100644
--- a/test/correctness/routines/level1/xdotu.cpp
+++ b/test/correctness/routines/level1/xdotu.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdotu.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXdotu<float2>, float2, float2>(argc, argv, false, "CDOTU");
- errors += clblast::RunTests<clblast::TestXdotu<double2>, double2, double2>(argc, argv, true, "ZDOTU");
+ errors += clblast::RunTests<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTU");
+ errors += clblast::RunTests<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTU");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xnrm2.cpp b/test/correctness/routines/level1/xnrm2.cpp
index 142fa7ba..46ca1526 100644
--- a/test/correctness/routines/level1/xnrm2.cpp
+++ b/test/correctness/routines/level1/xnrm2.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xnrm2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXnrm2<float>, float, float>(argc, argv, false, "SNRM2");
errors += clblast::RunTests<clblast::TestXnrm2<double>, double, double>(argc, argv, true, "DNRM2");
- errors += clblast::RunTests<clblast::TestXnrm2<float2>, float2, float2>(argc, argv, true, "ScNRM2");
- errors += clblast::RunTests<clblast::TestXnrm2<double2>, double2, double2>(argc, argv, true, "DzNRM2");
+ errors += clblast::RunTests<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScNRM2");
+ errors += clblast::RunTests<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzNRM2");
errors += clblast::RunTests<clblast::TestXnrm2<half>, half, half>(argc, argv, true, "HNRM2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xrot.cpp b/test/correctness/routines/level1/xrot.cpp
index 5af358eb..d5eb6516 100644
--- a/test/correctness/routines/level1/xrot.cpp
+++ b/test/correctness/routines/level1/xrot.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotg.cpp b/test/correctness/routines/level1/xrotg.cpp
index ad23a554..ec544eab 100644
--- a/test/correctness/routines/level1/xrotg.cpp
+++ b/test/correctness/routines/level1/xrotg.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotm.cpp b/test/correctness/routines/level1/xrotm.cpp
index 4f7e8f15..7f2d7ce6 100644
--- a/test/correctness/routines/level1/xrotm.cpp
+++ b/test/correctness/routines/level1/xrotm.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotmg.cpp b/test/correctness/routines/level1/xrotmg.cpp
index ca89bc12..4ef6e67d 100644
--- a/test/correctness/routines/level1/xrotmg.cpp
+++ b/test/correctness/routines/level1/xrotmg.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotmg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xscal.cpp b/test/correctness/routines/level1/xscal.cpp
index 939524be..c9788142 100644
--- a/test/correctness/routines/level1/xscal.cpp
+++ b/test/correctness/routines/level1/xscal.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xscal.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXscal<float>, float, float>(argc, argv, false, "SSCAL");
errors += clblast::RunTests<clblast::TestXscal<double>, double, double>(argc, argv, true, "DSCAL");
- errors += clblast::RunTests<clblast::TestXscal<float2>, float2, float2>(argc, argv, true, "CSCAL");
- errors += clblast::RunTests<clblast::TestXscal<double2>, double2, double2>(argc, argv, true, "ZSCAL");
+ errors += clblast::RunTests<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSCAL");
+ errors += clblast::RunTests<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSCAL");
errors += clblast::RunTests<clblast::TestXscal<half>, half, half>(argc, argv, true, "HSCAL");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xswap.cpp b/test/correctness/routines/level1/xswap.cpp
index 446f3d65..ee694a08 100644
--- a/test/correctness/routines/level1/xswap.cpp
+++ b/test/correctness/routines/level1/xswap.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xswap.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXswap<float>, float, float>(argc, argv, false, "SSWAP");
errors += clblast::RunTests<clblast::TestXswap<double>, double, double>(argc, argv, true, "DSWAP");
- errors += clblast::RunTests<clblast::TestXswap<float2>, float2, float2>(argc, argv, true, "CSWAP");
- errors += clblast::RunTests<clblast::TestXswap<double2>, double2, double2>(argc, argv, true, "ZSWAP");
+ errors += clblast::RunTests<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSWAP");
+ errors += clblast::RunTests<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSWAP");
errors += clblast::RunTests<clblast::TestXswap<half>, half, half>(argc, argv, true, "HSWAP");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgbmv.cpp b/test/correctness/routines/level2/xgbmv.cpp
index 8c49bc65..6aac283b 100644
--- a/test/correctness/routines/level2/xgbmv.cpp
+++ b/test/correctness/routines/level2/xgbmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgbmv<float>, float, float>(argc, argv, false, "SGBMV");
errors += clblast::RunTests<clblast::TestXgbmv<double>, double, double>(argc, argv, true, "DGBMV");
- errors += clblast::RunTests<clblast::TestXgbmv<float2>, float2, float2>(argc, argv, true, "CGBMV");
- errors += clblast::RunTests<clblast::TestXgbmv<double2>, double2, double2>(argc, argv, true, "ZGBMV");
+ errors += clblast::RunTests<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGBMV");
+ errors += clblast::RunTests<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGBMV");
errors += clblast::RunTests<clblast::TestXgbmv<half>, half, half>(argc, argv, true, "HGBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgemv.cpp b/test/correctness/routines/level2/xgemv.cpp
index 902ae777..66994b89 100644
--- a/test/correctness/routines/level2/xgemv.cpp
+++ b/test/correctness/routines/level2/xgemv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemv<float>, float, float>(argc, argv, false, "SGEMV");
errors += clblast::RunTests<clblast::TestXgemv<double>, double, double>(argc, argv, true, "DGEMV");
- errors += clblast::RunTests<clblast::TestXgemv<float2>, float2, float2>(argc, argv, true, "CGEMV");
- errors += clblast::RunTests<clblast::TestXgemv<double2>, double2, double2>(argc, argv, true, "ZGEMV");
+ errors += clblast::RunTests<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMV");
+ errors += clblast::RunTests<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMV");
errors += clblast::RunTests<clblast::TestXgemv<half>, half, half>(argc, argv, true, "HGEMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xger.cpp b/test/correctness/routines/level2/xger.cpp
index ce61bbcb..3b5d16e9 100644
--- a/test/correctness/routines/level2/xger.cpp
+++ b/test/correctness/routines/level2/xger.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xger.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xgerc.cpp b/test/correctness/routines/level2/xgerc.cpp
index b747f20d..42f6bb45 100644
--- a/test/correctness/routines/level2/xgerc.cpp
+++ b/test/correctness/routines/level2/xgerc.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgerc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXgerc<float2>, float2, float2>(argc, argv, false, "CGERC");
- errors += clblast::RunTests<clblast::TestXgerc<double2>, double2, double2>(argc, argv, true, "ZGERC");
+ errors += clblast::RunTests<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERC");
+ errors += clblast::RunTests<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERC");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgeru.cpp b/test/correctness/routines/level2/xgeru.cpp
index f80c1e2b..f167eff5 100644
--- a/test/correctness/routines/level2/xgeru.cpp
+++ b/test/correctness/routines/level2/xgeru.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgeru.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXgeru<float2>, float2, float2>(argc, argv, false, "CGERU");
- errors += clblast::RunTests<clblast::TestXgeru<double2>, double2, double2>(argc, argv, true, "ZGERU");
+ errors += clblast::RunTests<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERU");
+ errors += clblast::RunTests<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERU");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhbmv.cpp b/test/correctness/routines/level2/xhbmv.cpp
index a4885c01..168d9474 100644
--- a/test/correctness/routines/level2/xhbmv.cpp
+++ b/test/correctness/routines/level2/xhbmv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhbmv<float2>, float2, float2>(argc, argv, false, "CHBMV");
- errors += clblast::RunTests<clblast::TestXhbmv<double2>, double2, double2>(argc, argv, true, "ZHBMV");
+ errors += clblast::RunTests<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHBMV");
+ errors += clblast::RunTests<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhemv.cpp b/test/correctness/routines/level2/xhemv.cpp
index 4318ffee..eabdf67d 100644
--- a/test/correctness/routines/level2/xhemv.cpp
+++ b/test/correctness/routines/level2/xhemv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhemv<float2>, float2, float2>(argc, argv, false, "CHEMV");
- errors += clblast::RunTests<clblast::TestXhemv<double2>, double2, double2>(argc, argv, true, "ZHEMV");
+ errors += clblast::RunTests<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMV");
+ errors += clblast::RunTests<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xher.cpp b/test/correctness/routines/level2/xher.cpp
index fe37bd76..a47a45ac 100644
--- a/test/correctness/routines/level2/xher.cpp
+++ b/test/correctness/routines/level2/xher.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xher.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher<float2,float>, float2, float>(argc, argv, false, "CHER");
- errors += clblast::RunTests<clblast::TestXher<double2,double>, double2, double>(argc, argv, true, "ZHER");
+ errors += clblast::RunTests<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER");
+ errors += clblast::RunTests<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xher2.cpp b/test/correctness/routines/level2/xher2.cpp
index 0b4af4d0..544ab16d 100644
--- a/test/correctness/routines/level2/xher2.cpp
+++ b/test/correctness/routines/level2/xher2.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xher2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher2<float2>, float2, float2>(argc, argv, false, "CHER2");
- errors += clblast::RunTests<clblast::TestXher2<double2>, double2, double2>(argc, argv, true, "ZHER2");
+ errors += clblast::RunTests<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHER2");
+ errors += clblast::RunTests<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHER2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpmv.cpp b/test/correctness/routines/level2/xhpmv.cpp
index dd77df71..30d23b8f 100644
--- a/test/correctness/routines/level2/xhpmv.cpp
+++ b/test/correctness/routines/level2/xhpmv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpmv<float2>, float2, float2>(argc, argv, false, "CHPMV");
- errors += clblast::RunTests<clblast::TestXhpmv<double2>, double2, double2>(argc, argv, true, "ZHPMV");
+ errors += clblast::RunTests<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPMV");
+ errors += clblast::RunTests<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpr.cpp b/test/correctness/routines/level2/xhpr.cpp
index 5a3f615f..ed876857 100644
--- a/test/correctness/routines/level2/xhpr.cpp
+++ b/test/correctness/routines/level2/xhpr.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpr<float2,float>, float2, float>(argc, argv, false, "CHPR");
- errors += clblast::RunTests<clblast::TestXhpr<double2,double>, double2, double>(argc, argv, true, "ZHPR");
+ errors += clblast::RunTests<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHPR");
+ errors += clblast::RunTests<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHPR");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpr2.cpp b/test/correctness/routines/level2/xhpr2.cpp
index 8218b444..b3bd167a 100644
--- a/test/correctness/routines/level2/xhpr2.cpp
+++ b/test/correctness/routines/level2/xhpr2.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpr2<float2>, float2, float2>(argc, argv, false, "CHPR2");
- errors += clblast::RunTests<clblast::TestXhpr2<double2>, double2, double2>(argc, argv, true, "ZHPR2");
+ errors += clblast::RunTests<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPR2");
+ errors += clblast::RunTests<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPR2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xsbmv.cpp b/test/correctness/routines/level2/xsbmv.cpp
index 7918cb21..3b6b3972 100644
--- a/test/correctness/routines/level2/xsbmv.cpp
+++ b/test/correctness/routines/level2/xsbmv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspmv.cpp b/test/correctness/routines/level2/xspmv.cpp
index 78210660..9dccdbc1 100644
--- a/test/correctness/routines/level2/xspmv.cpp
+++ b/test/correctness/routines/level2/xspmv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspr.cpp b/test/correctness/routines/level2/xspr.cpp
index d05adf34..9cf242c1 100644
--- a/test/correctness/routines/level2/xspr.cpp
+++ b/test/correctness/routines/level2/xspr.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspr2.cpp b/test/correctness/routines/level2/xspr2.cpp
index caa46a09..2650bd03 100644
--- a/test/correctness/routines/level2/xspr2.cpp
+++ b/test/correctness/routines/level2/xspr2.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsymv.cpp b/test/correctness/routines/level2/xsymv.cpp
index 978a5f8a..3f0a8f8b 100644
--- a/test/correctness/routines/level2/xsymv.cpp
+++ b/test/correctness/routines/level2/xsymv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsymv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsyr.cpp b/test/correctness/routines/level2/xsyr.cpp
index 244dbfb4..15ac1f14 100644
--- a/test/correctness/routines/level2/xsyr.cpp
+++ b/test/correctness/routines/level2/xsyr.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsyr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsyr2.cpp b/test/correctness/routines/level2/xsyr2.cpp
index 422e67ad..74806219 100644
--- a/test/correctness/routines/level2/xsyr2.cpp
+++ b/test/correctness/routines/level2/xsyr2.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsyr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xtbmv.cpp b/test/correctness/routines/level2/xtbmv.cpp
index 491708ec..667ae732 100644
--- a/test/correctness/routines/level2/xtbmv.cpp
+++ b/test/correctness/routines/level2/xtbmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtbmv<float>, float, float>(argc, argv, false, "STBMV");
errors += clblast::RunTests<clblast::TestXtbmv<double>, double, double>(argc, argv, true, "DTBMV");
- errors += clblast::RunTests<clblast::TestXtbmv<float2>, float2, float2>(argc, argv, true, "CTBMV");
- errors += clblast::RunTests<clblast::TestXtbmv<double2>, double2, double2>(argc, argv, true, "ZTBMV");
+ errors += clblast::RunTests<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBMV");
+ errors += clblast::RunTests<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBMV");
errors += clblast::RunTests<clblast::TestXtbmv<half>, half, half>(argc, argv, true, "HTBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtbsv.cpp b/test/correctness/routines/level2/xtbsv.cpp
index 12b5dca5..5cfc6942 100644
--- a/test/correctness/routines/level2/xtbsv.cpp
+++ b/test/correctness/routines/level2/xtbsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtbsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtbsv<float>, float, float>(argc, argv, false, "STBSV");
errors += clblast::RunTests<clblast::TestXtbsv<double>, double, double>(argc, argv, true, "DTBSV");
- errors += clblast::RunTests<clblast::TestXtbsv<float2>, float2, float2>(argc, argv, true, "CTBSV");
- errors += clblast::RunTests<clblast::TestXtbsv<double2>, double2, double2>(argc, argv, true, "ZTBSV");
+ errors += clblast::RunTests<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBSV");
+ errors += clblast::RunTests<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtpmv.cpp b/test/correctness/routines/level2/xtpmv.cpp
index b89f0adc..89056678 100644
--- a/test/correctness/routines/level2/xtpmv.cpp
+++ b/test/correctness/routines/level2/xtpmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtpmv<float>, float, float>(argc, argv, false, "STPMV");
errors += clblast::RunTests<clblast::TestXtpmv<double>, double, double>(argc, argv, true, "DTPMV");
- errors += clblast::RunTests<clblast::TestXtpmv<float2>, float2, float2>(argc, argv, true, "CTPMV");
- errors += clblast::RunTests<clblast::TestXtpmv<double2>, double2, double2>(argc, argv, true, "ZTPMV");
+ errors += clblast::RunTests<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPMV");
+ errors += clblast::RunTests<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPMV");
errors += clblast::RunTests<clblast::TestXtpmv<half>, half, half>(argc, argv, true, "HTPMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtpsv.cpp b/test/correctness/routines/level2/xtpsv.cpp
index 6e6e7c85..28c9fe39 100644
--- a/test/correctness/routines/level2/xtpsv.cpp
+++ b/test/correctness/routines/level2/xtpsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtpsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtpsv<float>, float, float>(argc, argv, false, "STPSV");
errors += clblast::RunTests<clblast::TestXtpsv<double>, double, double>(argc, argv, true, "DTPSV");
- errors += clblast::RunTests<clblast::TestXtpsv<float2>, float2, float2>(argc, argv, true, "CTPSV");
- errors += clblast::RunTests<clblast::TestXtpsv<double2>, double2, double2>(argc, argv, true, "ZTPSV");
+ errors += clblast::RunTests<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPSV");
+ errors += clblast::RunTests<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtrmv.cpp b/test/correctness/routines/level2/xtrmv.cpp
index 819f5cad..b1a414af 100644
--- a/test/correctness/routines/level2/xtrmv.cpp
+++ b/test/correctness/routines/level2/xtrmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtrmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrmv<float>, float, float>(argc, argv, false, "STRMV");
errors += clblast::RunTests<clblast::TestXtrmv<double>, double, double>(argc, argv, true, "DTRMV");
- errors += clblast::RunTests<clblast::TestXtrmv<float2>, float2, float2>(argc, argv, true, "CTRMV");
- errors += clblast::RunTests<clblast::TestXtrmv<double2>, double2, double2>(argc, argv, true, "ZTRMV");
+ errors += clblast::RunTests<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMV");
+ errors += clblast::RunTests<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMV");
errors += clblast::RunTests<clblast::TestXtrmv<half>, half, half>(argc, argv, true, "HTRMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtrsv.cpp b/test/correctness/routines/level2/xtrsv.cpp
index 78e33807..b35d7fc7 100644
--- a/test/correctness/routines/level2/xtrsv.cpp
+++ b/test/correctness/routines/level2/xtrsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtrsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrsv<float>, float, float>(argc, argv, false, "STRSV");
errors += clblast::RunTests<clblast::TestXtrsv<double>, double, double>(argc, argv, true, "DTRSV");
- errors += clblast::RunTests<clblast::TestXtrsv<float2>, float2, float2>(argc, argv, true, "CTRSV");
- errors += clblast::RunTests<clblast::TestXtrsv<double2>, double2, double2>(argc, argv, true, "ZTRSV");
+ errors += clblast::RunTests<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSV");
+ errors += clblast::RunTests<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp
index 54d41719..7fda5f2d 100644
--- a/test/correctness/routines/level3/xgemm.cpp
+++ b/test/correctness/routines/level3/xgemm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xgemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemm<float>, float, float>(argc, argv, false, "SGEMM");
errors += clblast::RunTests<clblast::TestXgemm<double>, double, double>(argc, argv, true, "DGEMM");
- errors += clblast::RunTests<clblast::TestXgemm<float2>, float2, float2>(argc, argv, true, "CGEMM");
- errors += clblast::RunTests<clblast::TestXgemm<double2>, double2, double2>(argc, argv, true, "ZGEMM");
+ errors += clblast::RunTests<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM");
+ errors += clblast::RunTests<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM");
errors += clblast::RunTests<clblast::TestXgemm<half>, half, half>(argc, argv, true, "HGEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xhemm.cpp b/test/correctness/routines/level3/xhemm.cpp
index 76c970a7..cbd277e2 100644
--- a/test/correctness/routines/level3/xhemm.cpp
+++ b/test/correctness/routines/level3/xhemm.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xhemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhemm<float2>, float2, float2>(argc, argv, false, "CHEMM");
- errors += clblast::RunTests<clblast::TestXhemm<double2>, double2, double2>(argc, argv, true, "ZHEMM");
+ errors += clblast::RunTests<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMM");
+ errors += clblast::RunTests<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xher2k.cpp b/test/correctness/routines/level3/xher2k.cpp
index c653265e..e21a429c 100644
--- a/test/correctness/routines/level3/xher2k.cpp
+++ b/test/correctness/routines/level3/xher2k.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xher2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher2k<float2,float>, float2, float>(argc, argv, false, "CHER2K");
- errors += clblast::RunTests<clblast::TestXher2k<double2,double>, double2, double>(argc, argv, true, "ZHER2K");
+ errors += clblast::RunTests<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER2K");
+ errors += clblast::RunTests<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER2K");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xherk.cpp b/test/correctness/routines/level3/xherk.cpp
index 09ea9e4d..5665147e 100644
--- a/test/correctness/routines/level3/xherk.cpp
+++ b/test/correctness/routines/level3/xherk.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xherk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXherk<float2,float>, float2, float>(argc, argv, false, "CHERK");
- errors += clblast::RunTests<clblast::TestXherk<double2,double>, double2, double>(argc, argv, true, "ZHERK");
+ errors += clblast::RunTests<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHERK");
+ errors += clblast::RunTests<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHERK");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsymm.cpp b/test/correctness/routines/level3/xsymm.cpp
index 3cb3515a..3e745d24 100644
--- a/test/correctness/routines/level3/xsymm.cpp
+++ b/test/correctness/routines/level3/xsymm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsymm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsymm<float>, float, float>(argc, argv, false, "SSYMM");
errors += clblast::RunTests<clblast::TestXsymm<double>, double, double>(argc, argv, true, "DSYMM");
- errors += clblast::RunTests<clblast::TestXsymm<float2>, float2, float2>(argc, argv, true, "CSYMM");
- errors += clblast::RunTests<clblast::TestXsymm<double2>, double2, double2>(argc, argv, true, "ZSYMM");
+ errors += clblast::RunTests<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYMM");
+ errors += clblast::RunTests<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYMM");
errors += clblast::RunTests<clblast::TestXsymm<half>, half, half>(argc, argv, true, "HSYMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsyr2k.cpp b/test/correctness/routines/level3/xsyr2k.cpp
index 617af04d..b3027063 100644
--- a/test/correctness/routines/level3/xsyr2k.cpp
+++ b/test/correctness/routines/level3/xsyr2k.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsyr2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsyr2k<float>, float, float>(argc, argv, false, "SSYR2K");
errors += clblast::RunTests<clblast::TestXsyr2k<double>, double, double>(argc, argv, true, "DSYR2K");
- errors += clblast::RunTests<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv, true, "CSYR2K");
- errors += clblast::RunTests<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv, true, "ZSYR2K");
+ errors += clblast::RunTests<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYR2K");
+ errors += clblast::RunTests<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYR2K");
errors += clblast::RunTests<clblast::TestXsyr2k<half>, half, half>(argc, argv, true, "HSYR2K");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsyrk.cpp b/test/correctness/routines/level3/xsyrk.cpp
index 2014b8d0..26c0db41 100644
--- a/test/correctness/routines/level3/xsyrk.cpp
+++ b/test/correctness/routines/level3/xsyrk.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsyrk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsyrk<float>, float, float>(argc, argv, false, "SSYRK");
errors += clblast::RunTests<clblast::TestXsyrk<double>, double, double>(argc, argv, true, "DSYRK");
- errors += clblast::RunTests<clblast::TestXsyrk<float2>, float2, float2>(argc, argv, true, "CSYRK");
- errors += clblast::RunTests<clblast::TestXsyrk<double2>, double2, double2>(argc, argv, true, "ZSYRK");
+ errors += clblast::RunTests<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYRK");
+ errors += clblast::RunTests<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYRK");
errors += clblast::RunTests<clblast::TestXsyrk<half>, half, half>(argc, argv, true, "HSYRK");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xtrmm.cpp b/test/correctness/routines/level3/xtrmm.cpp
index 32640d52..63d17ed5 100644
--- a/test/correctness/routines/level3/xtrmm.cpp
+++ b/test/correctness/routines/level3/xtrmm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xtrmm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrmm<float>, float, float>(argc, argv, false, "STRMM");
errors += clblast::RunTests<clblast::TestXtrmm<double>, double, double>(argc, argv, true, "DTRMM");
- errors += clblast::RunTests<clblast::TestXtrmm<float2>, float2, float2>(argc, argv, true, "CTRMM");
- errors += clblast::RunTests<clblast::TestXtrmm<double2>, double2, double2>(argc, argv, true, "ZTRMM");
+ errors += clblast::RunTests<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMM");
+ errors += clblast::RunTests<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMM");
errors += clblast::RunTests<clblast::TestXtrmm<half>, half, half>(argc, argv, true, "HTRMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xtrsm.cpp b/test/correctness/routines/level3/xtrsm.cpp
index bc45a8bf..dcc20060 100644
--- a/test/correctness/routines/level3/xtrsm.cpp
+++ b/test/correctness/routines/level3/xtrsm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xtrsm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrsm<float>, float, float>(argc, argv, false, "STRSM");
errors += clblast::RunTests<clblast::TestXtrsm<double>, double, double>(argc, argv, true, "DTRSM");
- errors += clblast::RunTests<clblast::TestXtrsm<float2>, float2, float2>(argc, argv, true, "CTRSM");
- errors += clblast::RunTests<clblast::TestXtrsm<double2>, double2, double2>(argc, argv, true, "ZTRSM");
+ errors += clblast::RunTests<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSM");
+ errors += clblast::RunTests<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xaxpybatched.cpp b/test/correctness/routines/levelx/xaxpybatched.cpp
index a106440f..3b906217 100644
--- a/test/correctness/routines/levelx/xaxpybatched.cpp
+++ b/test/correctness/routines/levelx/xaxpybatched.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xaxpybatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXaxpyBatched<float>, float, float>(argc, argv, false, "SAXPYBATCHED");
errors += clblast::RunTests<clblast::TestXaxpyBatched<double>, double, double>(argc, argv, true, "DAXPYBATCHED");
- errors += clblast::RunTests<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv, true, "CAXPYBATCHED");
- errors += clblast::RunTests<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv, true, "ZAXPYBATCHED");
+ errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPYBATCHED");
+ errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPYBATCHED");
errors += clblast::RunTests<clblast::TestXaxpyBatched<half>, half, half>(argc, argv, true, "HAXPYBATCHED");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xgemmbatched.cpp b/test/correctness/routines/levelx/xgemmbatched.cpp
index 748e1bb7..1e931fd5 100644
--- a/test/correctness/routines/levelx/xgemmbatched.cpp
+++ b/test/correctness/routines/levelx/xgemmbatched.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xgemmbatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemmBatched<float>, float, float>(argc, argv, false, "SGEMMBATCHED");
errors += clblast::RunTests<clblast::TestXgemmBatched<double>, double, double>(argc, argv, true, "DGEMMBATCHED");
- errors += clblast::RunTests<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv, true, "CGEMMBATCHED");
- errors += clblast::RunTests<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv, true, "ZGEMMBATCHED");
+ errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMBATCHED");
+ errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMBATCHED");
errors += clblast::RunTests<clblast::TestXgemmBatched<half>, half, half>(argc, argv, true, "HGEMMBATCHED");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xomatcopy.cpp b/test/correctness/routines/levelx/xomatcopy.cpp
index e034bc18..f512432b 100644
--- a/test/correctness/routines/levelx/xomatcopy.cpp
+++ b/test/correctness/routines/levelx/xomatcopy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xomatcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXomatcopy<float>, float, float>(argc, argv, false, "SOMATCOPY");
errors += clblast::RunTests<clblast::TestXomatcopy<double>, double, double>(argc, argv, true, "DOMATCOPY");
- errors += clblast::RunTests<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv, true, "COMATCOPY");
- errors += clblast::RunTests<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv, true, "ZOMATCOPY");
+ errors += clblast::RunTests<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "COMATCOPY");
+ errors += clblast::RunTests<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZOMATCOPY");
errors += clblast::RunTests<clblast::TestXomatcopy<half>, half, half>(argc, argv, true, "HOMATCOPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp
index 40784fdb..d1f3cbb2 100644
--- a/test/correctness/tester.cpp
+++ b/test/correctness/tester.cpp
@@ -116,24 +116,44 @@ Tester<T,U>::Tester(const std::vector<std::string> &arguments, const bool silent
tests_failed_{0} {
options_ = options;
+ // Determines which reference is the default
+ #if defined(CLBLAST_REF_CBLAS)
+ auto default_cblas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CLBLAS)
+ auto default_clblas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CUBLAS)
+ auto default_cublas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CBLAS)
+ default_cblas = 1;
+ #elif defined(CLBLAST_REF_CLBLAS)
+ default_clblas = 1;
+ #elif defined(CLBLAST_REF_CUBLAS)
+ default_cublas = 1;
+ #endif
+
// Determines which reference to test against
- #if defined(CLBLAST_REF_CLBLAS) && defined(CLBLAST_REF_CBLAS)
- compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 0);
- compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1);
- #elif CLBLAST_REF_CLBLAS
- compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 1);
- compare_cblas_ = 0;
- #elif CLBLAST_REF_CBLAS
- compare_clblas_ = 0;
- compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1);
- #else
- compare_clblas_ = 0;
- compare_cblas_ = 0;
+ compare_clblas_ = 0;
+ compare_cblas_ = 0;
+ compare_cublas_ = 0;
+ #if defined(CLBLAST_REF_CBLAS)
+ compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, default_cblas);
+ #endif
+ #if defined(CLBLAST_REF_CLBLAS)
+ compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, default_clblas);
+ #endif
+ #if defined(CLBLAST_REF_CUBLAS)
+ compare_cublas_ = GetArgument(arguments, help_, kArgComparecublas, default_cublas);
#endif
// Prints the help message (command-line arguments)
if (!silent) { fprintf(stdout, "\n* %s\n", help_.c_str()); }
+ // Support for cuBLAS not available yet
+ if (compare_cublas_) { throw std::runtime_error("Cannot test against cuBLAS; not implemented yet"); }
+
// Can only test against a single reference (not two, not zero)
if (compare_clblas_ && compare_cblas_) {
throw std::runtime_error("Cannot test against both clBLAS and CBLAS references; choose one using the -cblas and -clblas arguments");
diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp
index f60be04b..8cfa702f 100644
--- a/test/correctness/tester.hpp
+++ b/test/correctness/tester.hpp
@@ -113,6 +113,7 @@ class Tester {
// Testing against reference implementations
int compare_cblas_;
int compare_clblas_;
+ int compare_cublas_;
private:
diff --git a/test/performance/client.cpp b/test/performance/client.cpp
index 48d6708e..dc98ffbd 100644
--- a/test/performance/client.cpp
+++ b/test/performance/client.cpp
@@ -30,13 +30,14 @@ template <typename T, typename U> const int Client<T,U>::kSeed = 42; // fixed se
template <typename T, typename U>
Client<T,U>::Client(const Routine run_routine,
const Reference1 run_reference1, const Reference2 run_reference2,
- const std::vector<std::string> &options,
+ const Reference3 run_reference3, const std::vector<std::string> &options,
const std::vector<std::string> &buffers_in,
const std::vector<std::string> &buffers_out,
const GetMetric get_flops, const GetMetric get_bytes):
run_routine_(run_routine),
run_reference1_(run_reference1),
run_reference2_(run_reference2),
+ run_reference3_(run_reference3),
options_(options),
buffers_in_(buffers_in),
buffers_out_(buffers_out),
@@ -119,6 +120,11 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le
#else
args.compare_cblas = 0;
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ args.compare_cublas = GetArgument(command_line_args, help, kArgComparecublas, 1);
+ #else
+ args.compare_cublas = 0;
+ #endif
args.step = GetArgument(command_line_args, help, kArgStepSize, size_t{1});
args.num_steps = GetArgument(command_line_args, help, kArgNumSteps, size_t{0});
args.num_runs = GetArgument(command_line_args, help, kArgNumRuns, size_t{10});
@@ -133,24 +139,26 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le
// Comparison against a non-BLAS routine is not supported
if (level == 4) { // level-4 == level-X
- if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) {
if (!args.silent) {
- fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for this non-BLAS routine\n\n");
+ fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for this non-BLAS routine\n\n");
}
}
args.compare_clblas = 0;
args.compare_cblas = 0;
+ args.compare_cublas = 0;
}
- // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
+ // Comparison against other BLAS libraries is not supported in case of half-precision
if (args.precision == Precision::kHalf) {
- if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) {
if (!args.silent) {
- fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
+ fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for half-precision\n\n");
}
}
args.compare_clblas = 0;
args.compare_cblas = 0;
+ args.compare_cublas = 0;
}
// Returns the arguments
@@ -174,6 +182,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
#ifdef CLBLAST_REF_CLBLAS
if (args.compare_clblas) { clblasSetup(); }
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ if (args.compare_cublas) { cublasSetup(args); }
+ #endif
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
@@ -232,6 +243,16 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
timings.push_back(std::pair<std::string, double>("CPU BLAS", ms_cblas));
}
+ if (args.compare_cublas) {
+ auto buffers_host = BuffersHost<T>();
+ auto buffers_cuda = BuffersCUDA<T>();
+ DeviceToHost(args, buffers, buffers_host, queue, buffers_in_);
+ HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_);
+ auto ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
+ CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
+ HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
+ timings.push_back(std::pair<std::string, double>("cuBLAS", ms_cublas));
+ }
// Prints the performance of the tested libraries
PrintTableRow(args, timings);
@@ -251,6 +272,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
#ifdef CLBLAST_REF_CLBLAS
if (args.compare_clblas) { clblasTeardown(); }
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ if (args.compare_cublas) { cublasTeardown(args); }
+ #endif
}
// =================================================================================================
@@ -307,6 +331,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) {
fprintf(stdout, " | <-- CLBlast -->");
if (args.compare_clblas) { fprintf(stdout, " | <-- clBLAS -->"); }
if (args.compare_cblas) { fprintf(stdout, " | <-- CPU BLAS -->"); }
+ if (args.compare_cublas) { fprintf(stdout, " | <-- cuBLAS -->"); }
fprintf(stdout, " |\n");
}
@@ -315,6 +340,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) {
fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1");
if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); }
if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); }
+ if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); }
fprintf(stdout, "\n");
}
diff --git a/test/performance/client.hpp b/test/performance/client.hpp
index 12fd113d..47a13017 100644
--- a/test/performance/client.hpp
+++ b/test/performance/client.hpp
@@ -31,6 +31,7 @@
#ifdef CLBLAST_REF_CLBLAS
#include <clBLAS.h>
#endif
+#include "test/wrapper_cuda.hpp"
#include "clblast.h"
namespace clblast {
@@ -46,12 +47,13 @@ class Client {
using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
using Reference1 = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
using Reference2 = std::function<StatusCode(const Arguments<U>&, BuffersHost<T>&, Queue&)>;
+ using Reference3 = std::function<StatusCode(const Arguments<U>&, BuffersCUDA<T>&, Queue&)>;
using SetMetric = std::function<void(Arguments<U>&)>;
using GetMetric = std::function<size_t(const Arguments<U>&)>;
// The constructor
Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2,
- const std::vector<std::string> &options,
+ const Reference3 run_reference3, const std::vector<std::string> &options,
const std::vector<std::string> &buffers_in, const std::vector<std::string> &buffers_out,
const GetMetric get_flops, const GetMetric get_bytes);
@@ -84,6 +86,7 @@ class Client {
const Routine run_routine_;
const Reference1 run_reference1_;
const Reference2 run_reference2_;
+ const Reference3 run_reference3_;
const std::vector<std::string> options_;
const std::vector<std::string> buffers_in_;
const std::vector<std::string> buffers_out_;
@@ -118,9 +121,14 @@ void RunClient(int argc, char *argv[]) {
#else
auto reference2 = ReferenceNotAvailable<T,U,BuffersHost<T>>;
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ auto reference3 = C::RunReference3; // cuBLAS when available
+ #else
+ auto reference3 = ReferenceNotAvailable<T,U,BuffersCUDA<T>>;
+ #endif
// Creates a new client
- auto client = Client<T,U>(C::RunRoutine, reference1, reference2, C::GetOptions(),
+ auto client = Client<T,U>(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(),
C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes);
// Simple command line argument parser with defaults
diff --git a/test/performance/routines/level1/xamax.cpp b/test/performance/routines/level1/xamax.cpp
index 5dc7b3d9..5cbef604 100644
--- a/test/performance/routines/level1/xamax.cpp
+++ b/test/performance/routines/level1/xamax.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xamax.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXamax<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXamax<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXamax<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xasum.cpp b/test/performance/routines/level1/xasum.cpp
index bf5b2fa9..7fccb678 100644
--- a/test/performance/routines/level1/xasum.cpp
+++ b/test/performance/routines/level1/xasum.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xasum.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXasum<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXasum<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXasum<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xaxpy.cpp b/test/performance/routines/level1/xaxpy.cpp
index faccc089..739408bb 100644
--- a/test/performance/routines/level1/xaxpy.cpp
+++ b/test/performance/routines/level1/xaxpy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xaxpy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXaxpy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXaxpy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXaxpy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xcopy.cpp b/test/performance/routines/level1/xcopy.cpp
index 8aa536af..902c394f 100644
--- a/test/performance/routines/level1/xcopy.cpp
+++ b/test/performance/routines/level1/xcopy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXcopy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXcopy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXcopy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xdot.cpp b/test/performance/routines/level1/xdot.cpp
index 9a570e1e..b2d4d969 100644
--- a/test/performance/routines/level1/xdot.cpp
+++ b/test/performance/routines/level1/xdot.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xdotc.cpp b/test/performance/routines/level1/xdotc.cpp
index 426b81ae..308bcdab 100644
--- a/test/performance/routines/level1/xdotc.cpp
+++ b/test/performance/routines/level1/xdotc.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdotc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXdotc<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXdotc<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xdotu.cpp b/test/performance/routines/level1/xdotu.cpp
index 4fbe167d..fc54a8a5 100644
--- a/test/performance/routines/level1/xdotu.cpp
+++ b/test/performance/routines/level1/xdotu.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdotu.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXdotu<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXdotu<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xnrm2.cpp b/test/performance/routines/level1/xnrm2.cpp
index 6a1cdcc7..769335eb 100644
--- a/test/performance/routines/level1/xnrm2.cpp
+++ b/test/performance/routines/level1/xnrm2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xnrm2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXnrm2<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXnrm2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXnrm2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xrot.cpp b/test/performance/routines/level1/xrot.cpp
index 2b94ca39..f010e04a 100644
--- a/test/performance/routines/level1/xrot.cpp
+++ b/test/performance/routines/level1/xrot.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotg.cpp b/test/performance/routines/level1/xrotg.cpp
index ee6fc44b..4c8d33cf 100644
--- a/test/performance/routines/level1/xrotg.cpp
+++ b/test/performance/routines/level1/xrotg.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotm.cpp b/test/performance/routines/level1/xrotm.cpp
index e8d73311..bc2111b3 100644
--- a/test/performance/routines/level1/xrotm.cpp
+++ b/test/performance/routines/level1/xrotm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotmg.cpp b/test/performance/routines/level1/xrotmg.cpp
index a5266b14..fb568243 100644
--- a/test/performance/routines/level1/xrotmg.cpp
+++ b/test/performance/routines/level1/xrotmg.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotmg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xscal.cpp b/test/performance/routines/level1/xscal.cpp
index 6fefc5d0..b9db60cf 100644
--- a/test/performance/routines/level1/xscal.cpp
+++ b/test/performance/routines/level1/xscal.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xscal.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXscal<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXscal<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXscal<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xswap.cpp b/test/performance/routines/level1/xswap.cpp
index b728b8f4..db40f6e4 100644
--- a/test/performance/routines/level1/xswap.cpp
+++ b/test/performance/routines/level1/xswap.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xswap.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXswap<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXswap<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXswap<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgbmv.cpp b/test/performance/routines/level2/xgbmv.cpp
index 6a4b01f8..23a91503 100644
--- a/test/performance/routines/level2/xgbmv.cpp
+++ b/test/performance/routines/level2/xgbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgbmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgemv.cpp b/test/performance/routines/level2/xgemv.cpp
index 335d5ef1..3bb14b68 100644
--- a/test/performance/routines/level2/xgemv.cpp
+++ b/test/performance/routines/level2/xgemv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xger.cpp b/test/performance/routines/level2/xger.cpp
index 50fdb9e6..ca23b8f0 100644
--- a/test/performance/routines/level2/xger.cpp
+++ b/test/performance/routines/level2/xger.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xger.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xgerc.cpp b/test/performance/routines/level2/xgerc.cpp
index 67c72285..0423cdd5 100644
--- a/test/performance/routines/level2/xgerc.cpp
+++ b/test/performance/routines/level2/xgerc.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgerc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgerc<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgerc<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgeru.cpp b/test/performance/routines/level2/xgeru.cpp
index 6e845bb8..c0fbb2d5 100644
--- a/test/performance/routines/level2/xgeru.cpp
+++ b/test/performance/routines/level2/xgeru.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgeru.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgeru<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgeru<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhbmv.cpp b/test/performance/routines/level2/xhbmv.cpp
index 600317c1..d59cba26 100644
--- a/test/performance/routines/level2/xhbmv.cpp
+++ b/test/performance/routines/level2/xhbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhemv.cpp b/test/performance/routines/level2/xhemv.cpp
index 7700cf7b..1664b6cd 100644
--- a/test/performance/routines/level2/xhemv.cpp
+++ b/test/performance/routines/level2/xhemv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhemv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhemv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xher.cpp b/test/performance/routines/level2/xher.cpp
index e7276aee..434f486c 100644
--- a/test/performance/routines/level2/xher.cpp
+++ b/test/performance/routines/level2/xher.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xher.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xher2.cpp b/test/performance/routines/level2/xher2.cpp
index b4c53206..cce40a9e 100644
--- a/test/performance/routines/level2/xher2.cpp
+++ b/test/performance/routines/level2/xher2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xher2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpmv.cpp b/test/performance/routines/level2/xhpmv.cpp
index d9683d2e..d88791fe 100644
--- a/test/performance/routines/level2/xhpmv.cpp
+++ b/test/performance/routines/level2/xhpmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpr.cpp b/test/performance/routines/level2/xhpr.cpp
index c4ffaf81..a92a3134 100644
--- a/test/performance/routines/level2/xhpr.cpp
+++ b/test/performance/routines/level2/xhpr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpr<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpr<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpr2.cpp b/test/performance/routines/level2/xhpr2.cpp
index 3e5d4004..f34de29b 100644
--- a/test/performance/routines/level2/xhpr2.cpp
+++ b/test/performance/routines/level2/xhpr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpr2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpr2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xsbmv.cpp b/test/performance/routines/level2/xsbmv.cpp
index 9c0ab3b6..59bbf40c 100644
--- a/test/performance/routines/level2/xsbmv.cpp
+++ b/test/performance/routines/level2/xsbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspmv.cpp b/test/performance/routines/level2/xspmv.cpp
index 6cc4e3ba..9ba29f43 100644
--- a/test/performance/routines/level2/xspmv.cpp
+++ b/test/performance/routines/level2/xspmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspr.cpp b/test/performance/routines/level2/xspr.cpp
index dc45ba6d..57551f5d 100644
--- a/test/performance/routines/level2/xspr.cpp
+++ b/test/performance/routines/level2/xspr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspr2.cpp b/test/performance/routines/level2/xspr2.cpp
index 3c9a769f..573fb652 100644
--- a/test/performance/routines/level2/xspr2.cpp
+++ b/test/performance/routines/level2/xspr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsymv.cpp b/test/performance/routines/level2/xsymv.cpp
index aaa98c8b..25933d8d 100644
--- a/test/performance/routines/level2/xsymv.cpp
+++ b/test/performance/routines/level2/xsymv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsymv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsyr.cpp b/test/performance/routines/level2/xsyr.cpp
index d710bf63..3b54510d 100644
--- a/test/performance/routines/level2/xsyr.cpp
+++ b/test/performance/routines/level2/xsyr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsyr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsyr2.cpp b/test/performance/routines/level2/xsyr2.cpp
index 39b46b6a..ab9641c2 100644
--- a/test/performance/routines/level2/xsyr2.cpp
+++ b/test/performance/routines/level2/xsyr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsyr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xtbmv.cpp b/test/performance/routines/level2/xtbmv.cpp
index 5fb3ea14..319f9c80 100644
--- a/test/performance/routines/level2/xtbmv.cpp
+++ b/test/performance/routines/level2/xtbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtbmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtbsv.cpp b/test/performance/routines/level2/xtbsv.cpp
index 7b88917c..4d37e76d 100644
--- a/test/performance/routines/level2/xtbsv.cpp
+++ b/test/performance/routines/level2/xtbsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtbsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtbsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtbsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtbsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtpmv.cpp b/test/performance/routines/level2/xtpmv.cpp
index 907749a7..c2db51b1 100644
--- a/test/performance/routines/level2/xtpmv.cpp
+++ b/test/performance/routines/level2/xtpmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtpmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtpmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtpmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtpsv.cpp b/test/performance/routines/level2/xtpsv.cpp
index 0dab8ff6..b01a9f05 100644
--- a/test/performance/routines/level2/xtpsv.cpp
+++ b/test/performance/routines/level2/xtpsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtpsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtpsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtpsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtpsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtrmv.cpp b/test/performance/routines/level2/xtrmv.cpp
index c2c6f232..610a5052 100644
--- a/test/performance/routines/level2/xtrmv.cpp
+++ b/test/performance/routines/level2/xtrmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtrmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtrsv.cpp b/test/performance/routines/level2/xtrsv.cpp
index 49e477f7..02255e71 100644
--- a/test/performance/routines/level2/xtrsv.cpp
+++ b/test/performance/routines/level2/xtrsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtrsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xgemm.cpp b/test/performance/routines/level3/xgemm.cpp
index deb2493f..602e1a20 100644
--- a/test/performance/routines/level3/xgemm.cpp
+++ b/test/performance/routines/level3/xgemm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xgemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xhemm.cpp b/test/performance/routines/level3/xhemm.cpp
index 975c672f..6c3687a9 100644
--- a/test/performance/routines/level3/xhemm.cpp
+++ b/test/performance/routines/level3/xhemm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xhemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhemm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhemm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xher2k.cpp b/test/performance/routines/level3/xher2k.cpp
index d579d4f9..9d3385f7 100644
--- a/test/performance/routines/level3/xher2k.cpp
+++ b/test/performance/routines/level3/xher2k.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xher2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher2k<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher2k<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xherk.cpp b/test/performance/routines/level3/xherk.cpp
index 94411e5a..ae6e774e 100644
--- a/test/performance/routines/level3/xherk.cpp
+++ b/test/performance/routines/level3/xherk.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xherk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXherk<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXherk<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsymm.cpp b/test/performance/routines/level3/xsymm.cpp
index 04ae8eb0..ba3b6ab2 100644
--- a/test/performance/routines/level3/xsymm.cpp
+++ b/test/performance/routines/level3/xsymm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsymm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsymm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsymm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsymm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsyr2k.cpp b/test/performance/routines/level3/xsyr2k.cpp
index 7b8b6f4f..150a4192 100644
--- a/test/performance/routines/level3/xsyr2k.cpp
+++ b/test/performance/routines/level3/xsyr2k.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsyr2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyr2k<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsyrk.cpp b/test/performance/routines/level3/xsyrk.cpp
index ea0fc33b..00cef52b 100644
--- a/test/performance/routines/level3/xsyrk.cpp
+++ b/test/performance/routines/level3/xsyrk.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsyrk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyrk<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsyrk<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsyrk<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xtrmm.cpp b/test/performance/routines/level3/xtrmm.cpp
index 7a29e111..fb54a410 100644
--- a/test/performance/routines/level3/xtrmm.cpp
+++ b/test/performance/routines/level3/xtrmm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xtrmm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrmm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrmm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrmm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xtrsm.cpp b/test/performance/routines/level3/xtrsm.cpp
index 342274b7..f44265f2 100644
--- a/test/performance/routines/level3/xtrsm.cpp
+++ b/test/performance/routines/level3/xtrsm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xtrsm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrsm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrsm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrsm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xaxpybatched.cpp b/test/performance/routines/levelx/xaxpybatched.cpp
index 6d3bcb51..7c09cd5b 100644
--- a/test/performance/routines/levelx/xaxpybatched.cpp
+++ b/test/performance/routines/levelx/xaxpybatched.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xaxpybatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXaxpyBatched<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xgemmbatched.cpp b/test/performance/routines/levelx/xgemmbatched.cpp
index c9477fad..f4c860d8 100644
--- a/test/performance/routines/levelx/xgemmbatched.cpp
+++ b/test/performance/routines/levelx/xgemmbatched.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xgemmbatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemmBatched<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xomatcopy.cpp b/test/performance/routines/levelx/xomatcopy.cpp
index 5821c3b8..568f22e6 100644
--- a/test/performance/routines/levelx/xomatcopy.cpp
+++ b/test/performance/routines/levelx/xomatcopy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xomatcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXomatcopy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/routines/common.hpp b/test/routines/common.hpp
new file mode 100644
index 00000000..9708288a
--- /dev/null
+++ b/test/routines/common.hpp
@@ -0,0 +1,36 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file contains all the common includes for the clients and tests
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_ROUTINES_COMMON_H_
+#define CLBLAST_TEST_ROUTINES_COMMON_H_
+
+#include <vector>
+#include <string>
+
+#include "utilities/utilities.hpp"
+
+#ifdef CLBLAST_REF_CLBLAS
+ #include "test/wrapper_clblas.hpp"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "test/wrapper_cblas.hpp"
+#endif
+#include "test/wrapper_cuda.hpp"
+#ifdef CLBLAST_REF_CUBLAS
+ #include "test/wrapper_cublas.hpp"
+#endif
+
+// =================================================================================================
+
+// CLBLAST_TEST_ROUTINES_COMMON_H_
+#endif
diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp
index 2e844f2c..04bdaa3d 100644
--- a/test/routines/level1/xamax.hpp
+++ b/test/routines/level1/xamax.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAMAX_H_
#define CLBLAST_TEST_ROUTINES_XAMAX_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXamax {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXamax(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.imax_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp
index 8488bfc6..6add9c64 100644
--- a/test/routines/level1/xasum.hpp
+++ b/test/routines/level1/xasum.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XASUM_H_
#define CLBLAST_TEST_ROUTINES_XASUM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXasum {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXasum(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.asum_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index cc7d251a..17cae6ad 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAXPY_H_
#define CLBLAST_TEST_ROUTINES_XAXPY_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -112,6 +104,16 @@ class TestXaxpy {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXaxpy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp
index 0dbf0f3d..7a5c99b8 100644
--- a/test/routines/level1/xcopy.hpp
+++ b/test/routines/level1/xcopy.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XCOPY_H_
#define CLBLAST_TEST_ROUTINES_XCOPY_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXcopy {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXcopy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp
index bdf2e721..1ea25994 100644
--- a/test/routines/level1/xdot.hpp
+++ b/test/routines/level1/xdot.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOT_H_
#define CLBLAST_TEST_ROUTINES_XDOT_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdot {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdot(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp
index 2cc71b93..c800c1f5 100644
--- a/test/routines/level1/xdotc.hpp
+++ b/test/routines/level1/xdotc.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOTC_H_
#define CLBLAST_TEST_ROUTINES_XDOTC_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdotc {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdotc(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp
index 272e1e31..3545a3a6 100644
--- a/test/routines/level1/xdotu.hpp
+++ b/test/routines/level1/xdotu.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOTU_H_
#define CLBLAST_TEST_ROUTINES_XDOTU_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdotu {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdotu(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp
index cb1ec683..1db70537 100644
--- a/test/routines/level1/xnrm2.hpp
+++ b/test/routines/level1/xnrm2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XNRM2_H_
#define CLBLAST_TEST_ROUTINES_XNRM2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXnrm2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXnrm2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.nrm2_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp
index 3e6b9a38..efa0988d 100644
--- a/test/routines/level1/xscal.hpp
+++ b/test/routines/level1/xscal.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSCAL_H_
#define CLBLAST_TEST_ROUTINES_XSCAL_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -105,6 +97,15 @@ class TestXscal {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXscal(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp
index d9b84dc4..d778cc23 100644
--- a/test/routines/level1/xswap.hpp
+++ b/test/routines/level1/xswap.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSWAP_H_
#define CLBLAST_TEST_ROUTINES_XSWAP_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXswap {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXswap(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size + args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp
index 990ef49f..23138c77 100644
--- a/test/routines/level2/xgbmv.hpp
+++ b/test/routines/level2/xgbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGBMV_H_
#define CLBLAST_TEST_ROUTINES_XGBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -131,6 +123,19 @@ class TestXgbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ args.m, args.n, args.kl, args.ku, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp
index a007cb62..0ee53b80 100644
--- a/test/routines/level2/xgemv.hpp
+++ b/test/routines/level2/xgemv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMV_H_
#define CLBLAST_TEST_ROUTINES_XGEMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -131,6 +123,19 @@ class TestXgemv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgemv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp
index 5c131e2d..92a1a2ae 100644
--- a/test/routines/level2/xger.hpp
+++ b/test/routines/level2/xger.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGER_H_
#define CLBLAST_TEST_ROUTINES_XGER_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXger {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXger(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp
index e3544424..5d899398 100644
--- a/test/routines/level2/xgerc.hpp
+++ b/test/routines/level2/xgerc.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGERC_H_
#define CLBLAST_TEST_ROUTINES_XGERC_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXgerc {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgerc(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp
index 1d81e292..96dab22e 100644
--- a/test/routines/level2/xgeru.hpp
+++ b/test/routines/level2/xgeru.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGERU_H_
#define CLBLAST_TEST_ROUTINES_XGERU_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXgeru {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgeru(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp
index 21194fd6..b6844744 100644
--- a/test/routines/level2/xhbmv.hpp
+++ b/test/routines/level2/xhbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHBMV_H_
#define CLBLAST_TEST_ROUTINES_XHBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.kl, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp
index ffef8ff8..e1f23592 100644
--- a/test/routines/level2/xhemv.hpp
+++ b/test/routines/level2/xhemv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHEMV_H_
#define CLBLAST_TEST_ROUTINES_XHEMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhemv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhemv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp
index 083bd3fc..1ac1247b 100644
--- a/test/routines/level2/xher.hpp
+++ b/test/routines/level2/xher.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER_H_
#define CLBLAST_TEST_ROUTINES_XHER_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXher {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXher(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp
index 7bd890a5..18ccc1ac 100644
--- a/test/routines/level2/xher2.hpp
+++ b/test/routines/level2/xher2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER2_H_
#define CLBLAST_TEST_ROUTINES_XHER2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXher2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXher2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp
index 285dd6d3..ad91fe15 100644
--- a/test/routines/level2/xhpmv.hpp
+++ b/test/routines/level2/xhpmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPMV_H_
#define CLBLAST_TEST_ROUTINES_XHPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhpmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp
index 88bae86b..f9d580cd 100644
--- a/test/routines/level2/xhpr.hpp
+++ b/test/routines/level2/xhpr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPR_H_
#define CLBLAST_TEST_ROUTINES_XHPR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXhpr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp
index cd10fa00..f946ba5c 100644
--- a/test/routines/level2/xhpr2.hpp
+++ b/test/routines/level2/xhpr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPR2_H_
#define CLBLAST_TEST_ROUTINES_XHPR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhpr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp
index 5c70aba5..6481d19b 100644
--- a/test/routines/level2/xsbmv.hpp
+++ b/test/routines/level2/xsbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSBMV_H_
#define CLBLAST_TEST_ROUTINES_XSBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.kl, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp
index 560f5baa..9815dbee 100644
--- a/test/routines/level2/xspmv.hpp
+++ b/test/routines/level2/xspmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPMV_H_
#define CLBLAST_TEST_ROUTINES_XSPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXspmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp
index 2e12db33..01a50c38 100644
--- a/test/routines/level2/xspr.hpp
+++ b/test/routines/level2/xspr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPR_H_
#define CLBLAST_TEST_ROUTINES_XSPR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXspr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp
index a7e22227..55f8a141 100644
--- a/test/routines/level2/xspr2.hpp
+++ b/test/routines/level2/xspr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPR2_H_
#define CLBLAST_TEST_ROUTINES_XSPR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXspr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp
index d9cf9c1e..aec0dfb0 100644
--- a/test/routines/level2/xsymv.hpp
+++ b/test/routines/level2/xsymv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYMV_H_
#define CLBLAST_TEST_ROUTINES_XSYMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsymv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsymv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp
index b60c3a36..78b686d8 100644
--- a/test/routines/level2/xsyr.hpp
+++ b/test/routines/level2/xsyr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR_H_
#define CLBLAST_TEST_ROUTINES_XSYR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXsyr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp
index dd10a3d0..38aa4f43 100644
--- a/test/routines/level2/xsyr2.hpp
+++ b/test/routines/level2/xsyr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR2_H_
#define CLBLAST_TEST_ROUTINES_XSYR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsyr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp
index 7eb8ce9e..8c7aa381 100644
--- a/test/routines/level2/xtbmv.hpp
+++ b/test/routines/level2/xtbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTBMV_H_
#define CLBLAST_TEST_ROUTINES_XTBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n, args.kl,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp
index 7f4842f0..3afab978 100644
--- a/test/routines/level2/xtpmv.hpp
+++ b/test/routines/level2/xtpmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTPMV_H_
#define CLBLAST_TEST_ROUTINES_XTPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtpmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtpmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp
index cb7527ed..2b71f151 100644
--- a/test/routines/level2/xtrmv.hpp
+++ b/test/routines/level2/xtrmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRMV_H_
#define CLBLAST_TEST_ROUTINES_XTRMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtrmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index 63d34758..85b50e85 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_
#define CLBLAST_TEST_ROUTINES_XTRSV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -136,6 +128,20 @@ class TestXtrsv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrsv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index a33cbfec..7e0ead6d 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMM_H_
#define CLBLAST_TEST_ROUTINES_XGEMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXgemm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.b_transpose),
+ args.m, args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp
index 74029c7e..a89617b5 100644
--- a/test/routines/level3/xhemm.hpp
+++ b/test/routines/level3/xhemm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHEMM_H_
#define CLBLAST_TEST_ROUTINES_XHEMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXhemm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp
index ea13bbc1..55e6d894 100644
--- a/test/routines/level3/xher2k.hpp
+++ b/test/routines/level3/xher2k.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER2K_H_
#define CLBLAST_TEST_ROUTINES_XHER2K_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -136,6 +128,21 @@ class TestXher2k {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto alpha2 = T{args.alpha, args.alpha};
+ auto status = cublasXher2k(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, alpha2,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp
index b1ce83e0..3e1e7e02 100644
--- a/test/routines/level3/xherk.hpp
+++ b/test/routines/level3/xherk.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHERK_H_
#define CLBLAST_TEST_ROUTINES_XHERK_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -123,6 +115,19 @@ class TestXherk {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXherk(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp
index 6ab644b8..5d840d40 100644
--- a/test/routines/level3/xsymm.hpp
+++ b/test/routines/level3/xsymm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYMM_H_
#define CLBLAST_TEST_ROUTINES_XSYMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXsymm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsymm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp
index 1400c4e2..4a4a2f10 100644
--- a/test/routines/level3/xsyr2k.hpp
+++ b/test/routines/level3/xsyr2k.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR2K_H_
#define CLBLAST_TEST_ROUTINES_XSYR2K_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -133,6 +125,20 @@ class TestXsyr2k {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr2k(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp
index 2df8d6b0..90e46727 100644
--- a/test/routines/level3/xsyrk.hpp
+++ b/test/routines/level3/xsyrk.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYRK_H_
#define CLBLAST_TEST_ROUTINES_XSYRK_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -123,6 +115,19 @@ class TestXsyrk {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyrk(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp
index 84adc6e0..acc00e01 100644
--- a/test/routines/level3/xtrmm.hpp
+++ b/test/routines/level3/xtrmm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRMM_H_
#define CLBLAST_TEST_ROUTINES_XTRMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -127,6 +119,21 @@ class TestXtrmm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrmm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.b_size, static_cast<T>(0));
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index de5b307d..d63c9d79 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -16,18 +16,9 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRSM_H_
#define CLBLAST_TEST_ROUTINES_XTRSM_H_
-#include <vector>
-#include <string>
-
+#include "test/routines/common.hpp"
#include "test/routines/level3/xtrsm_data.hpp"
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
-
namespace clblast {
// =================================================================================================
@@ -139,6 +130,21 @@ class TestXtrsm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrsm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.b_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp
index 05141bbb..5385e86e 100644
--- a/test/routines/levelx/xaxpybatched.hpp
+++ b/test/routines/levelx/xaxpybatched.hpp
@@ -16,17 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_
#define CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_
-#include <vector>
-#include <string>
-
-#include "utilities/utilities.hpp"
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +125,19 @@ class TestXaxpyBatched {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
+ auto status = cublasXaxpy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alphas[batch],
+ buffers.x_vec, args.x_offsets[batch], args.x_inc,
+ buffers.y_vec, args.y_offsets[batch], args.y_inc);
+ if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; }
+ }
+ return StatusCode::kSuccess;
+ }
+ #endif
+
// Describes how to download the results of the computation
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp
index ab5f20c5..ebfd8b19 100644
--- a/test/routines/levelx/xgemmbatched.hpp
+++ b/test/routines/levelx/xgemmbatched.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_
#define CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -168,6 +160,23 @@ class TestXgemmBatched {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
+ auto status = cublasXgemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.b_transpose),
+ args.m, args.n, args.k, args.alphas[batch],
+ buffers.a_mat, args.a_offsets[batch], args.a_ld,
+ buffers.b_mat, args.b_offsets[batch], args.b_ld, args.betas[batch],
+ buffers.c_mat, args.c_offsets[batch], args.c_ld);
+ if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; }
+ }
+ return StatusCode::kSuccess;
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index ffb484b0..cc02a88b 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -16,10 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XINVERT_H_
#define CLBLAST_TEST_ROUTINES_XINVERT_H_
-#include <vector>
-#include <string>
-
-#include "utilities/utilities.hpp"
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -192,6 +189,9 @@ class TestXinvert {
static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
return RunReference(args, buffers_host);
}
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ return StatusCode::kUnknownError;
+ }
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp
index d5973b4c..bbf6006c 100644
--- a/test/routines/levelx/xomatcopy.hpp
+++ b/test/routines/levelx/xomatcopy.hpp
@@ -16,8 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XOMATCOPY_H_
#define CLBLAST_TEST_ROUTINES_XOMATCOPY_H_
-#include <vector>
-#include <string>
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -151,6 +150,9 @@ class TestXomatcopy {
static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
return RunReference(args, buffers_host);
}
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ return StatusCode::kUnknownError;
+ }
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/wrapper_cblas.hpp b/test/wrapper_cblas.hpp
index dd610a6c..070d44b5 100644
--- a/test/wrapper_cblas.hpp
+++ b/test/wrapper_cblas.hpp
@@ -94,7 +94,7 @@ void cblasXrot(const size_t n,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
const float cos,
const float sin) {
- cblas_srot(n,
+ cblas_srot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
cos,
@@ -105,7 +105,7 @@ void cblasXrot(const size_t n,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
const double cos,
const double sin) {
- cblas_drot(n,
+ cblas_drot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
cos,
@@ -117,7 +117,7 @@ void cblasXrotm(const size_t n,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& sparam_buffer, const size_t sparam_offset) {
- cblas_srotm(n,
+ cblas_srotm(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
&sparam_buffer[sparam_offset]);
@@ -126,7 +126,7 @@ void cblasXrotm(const size_t n,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& sparam_buffer, const size_t sparam_offset) {
- cblas_drotm(n,
+ cblas_drotm(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
&sparam_buffer[sparam_offset]);
@@ -136,28 +136,28 @@ void cblasXrotm(const size_t n,
void cblasXswap(const size_t n,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_sswap(n,
+ cblas_sswap(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_dswap(n,
+ cblas_dswap(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cswap(n,
+ cblas_cswap(static_cast<int>(n),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zswap(n,
+ cblas_zswap(static_cast<int>(n),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
@@ -177,14 +177,14 @@ void cblasXswap(const size_t n,
void cblasXscal(const size_t n,
const float alpha,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- cblas_sscal(n,
+ cblas_sscal(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXscal(const size_t n,
const double alpha,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- cblas_dscal(n,
+ cblas_dscal(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -192,7 +192,7 @@ void cblasXscal(const size_t n,
const float2 alpha,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
- cblas_cscal(n,
+ cblas_cscal(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -200,7 +200,7 @@ void cblasXscal(const size_t n,
const double2 alpha,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
- cblas_zscal(n,
+ cblas_zscal(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -218,28 +218,28 @@ void cblasXscal(const size_t n,
void cblasXcopy(const size_t n,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_scopy(n,
+ cblas_scopy(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_dcopy(n,
+ cblas_dcopy(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_ccopy(n,
+ cblas_ccopy(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zcopy(n,
+ cblas_zcopy(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
@@ -259,7 +259,7 @@ void cblasXaxpy(const size_t n,
const float alpha,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_saxpy(n,
+ cblas_saxpy(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
@@ -268,7 +268,7 @@ void cblasXaxpy(const size_t n,
const double alpha,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_daxpy(n,
+ cblas_daxpy(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
@@ -278,7 +278,7 @@ void cblasXaxpy(const size_t n,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
- cblas_caxpy(n,
+ cblas_caxpy(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
@@ -288,7 +288,7 @@ void cblasXaxpy(const size_t n,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
- cblas_zaxpy(n,
+ cblas_zaxpy(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
@@ -311,7 +311,7 @@ void cblasXdot(const size_t n,
std::vector<float>& dot_buffer, const size_t dot_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- dot_buffer[dot_offset] = cblas_sdot(n,
+ dot_buffer[dot_offset] = cblas_sdot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
@@ -319,7 +319,7 @@ void cblasXdot(const size_t n,
std::vector<double>& dot_buffer, const size_t dot_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- dot_buffer[dot_offset] = cblas_ddot(n,
+ dot_buffer[dot_offset] = cblas_ddot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
@@ -342,7 +342,7 @@ void cblasXdotu(const size_t n,
std::vector<float2>& dot_buffer, const size_t dot_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cdotu_sub(n,
+ cblas_cdotu_sub(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
@@ -351,7 +351,7 @@ void cblasXdotu(const size_t n,
std::vector<double2>& dot_buffer, const size_t dot_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zdotu_sub(n,
+ cblas_zdotu_sub(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
@@ -362,7 +362,7 @@ void cblasXdotc(const size_t n,
std::vector<float2>& dot_buffer, const size_t dot_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cdotc_sub(n,
+ cblas_cdotc_sub(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
@@ -371,7 +371,7 @@ void cblasXdotc(const size_t n,
std::vector<double2>& dot_buffer, const size_t dot_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zdotc_sub(n,
+ cblas_zdotc_sub(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
@@ -381,25 +381,25 @@ void cblasXdotc(const size_t n,
void cblasXnrm2(const size_t n,
std::vector<float>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset] = cblas_snrm2(n,
+ nrm2_buffer[nrm2_offset] = cblas_snrm2(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXnrm2(const size_t n,
std::vector<double>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset] = cblas_dnrm2(n,
+ nrm2_buffer[nrm2_offset] = cblas_dnrm2(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXnrm2(const size_t n,
std::vector<float2>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset].real(cblas_scnrm2(n,
+ nrm2_buffer[nrm2_offset].real(cblas_scnrm2(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXnrm2(const size_t n,
std::vector<double2>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
+ nrm2_buffer[nrm2_offset].real(cblas_dznrm2(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXnrm2(const size_t n,
@@ -417,25 +417,25 @@ void cblasXnrm2(const size_t n,
void cblasXasum(const size_t n,
std::vector<float>& asum_buffer, const size_t asum_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset] = cblas_sasum(n,
+ asum_buffer[asum_offset] = cblas_sasum(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXasum(const size_t n,
std::vector<double>& asum_buffer, const size_t asum_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset] = cblas_dasum(n,
+ asum_buffer[asum_offset] = cblas_dasum(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXasum(const size_t n,
std::vector<float2>& asum_buffer, const size_t asum_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset].real(cblas_scasum(n,
+ asum_buffer[asum_offset].real(cblas_scasum(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXasum(const size_t n,
std::vector<double2>& asum_buffer, const size_t asum_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset].real(cblas_dzasum(n,
+ asum_buffer[asum_offset].real(cblas_dzasum(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXasum(const size_t n,
@@ -453,25 +453,25 @@ void cblasXasum(const size_t n,
void cblasXamax(const size_t n,
std::vector<float>& imax_buffer, const size_t imax_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<double>& imax_buffer, const size_t imax_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<float2>& imax_buffer, const size_t imax_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<double2>& imax_buffer, const size_t imax_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
@@ -498,7 +498,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -513,7 +513,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -530,7 +530,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -547,7 +547,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -583,7 +583,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -598,7 +598,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -615,7 +615,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -632,7 +632,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -670,7 +670,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chemv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -687,7 +687,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhemv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -706,7 +706,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -723,7 +723,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -742,7 +742,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chpmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -759,7 +759,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhpmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -776,7 +776,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_ssymv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -791,7 +791,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dsymv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -827,7 +827,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_ssbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -842,7 +842,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dsbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -878,7 +878,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sspmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -893,7 +893,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dspmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -926,7 +926,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_strmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -935,7 +935,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -944,7 +944,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -953,7 +953,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -976,7 +976,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -985,7 +985,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -994,7 +994,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1003,7 +1003,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1026,7 +1026,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& ap_buffer, const size_t ap_offset,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1035,7 +1035,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& ap_buffer, const size_t ap_offset,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1044,7 +1044,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& ap_buffer, const size_t ap_offset,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1053,7 +1053,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& ap_buffer, const size_t ap_offset,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1076,7 +1076,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_strsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1085,7 +1085,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1094,7 +1094,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1103,7 +1103,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1114,7 +1114,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1123,7 +1123,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1132,7 +1132,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1141,7 +1141,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1152,7 +1152,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& ap_buffer, const size_t ap_offset,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1161,7 +1161,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& ap_buffer, const size_t ap_offset,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1170,7 +1170,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& ap_buffer, const size_t ap_offset,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1179,7 +1179,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& ap_buffer, const size_t ap_offset,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1192,7 +1192,7 @@ void cblasXger(const CBLAS_ORDER layout,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_sger(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1205,7 +1205,7 @@ void cblasXger(const CBLAS_ORDER layout,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dger(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1238,7 +1238,7 @@ void cblasXgeru(const CBLAS_ORDER layout,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cgeru(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1252,7 +1252,7 @@ void cblasXgeru(const CBLAS_ORDER layout,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zgeru(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1268,7 +1268,7 @@ void cblasXgerc(const CBLAS_ORDER layout,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cgerc(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1282,7 +1282,7 @@ void cblasXgerc(const CBLAS_ORDER layout,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zgerc(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1296,7 +1296,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_cher(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
@@ -1307,7 +1307,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_zher(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
@@ -1320,7 +1320,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& ap_buffer, const size_t ap_offset) {
cblas_chpr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&ap_buffer[ap_offset]));
@@ -1331,7 +1331,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& ap_buffer, const size_t ap_offset) {
cblas_zhpr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&ap_buffer[ap_offset]));
@@ -1346,7 +1346,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cher2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1360,7 +1360,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zher2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1376,7 +1376,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<float2>& ap_buffer, const size_t ap_offset) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_chpr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1390,7 +1390,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<double2>& ap_buffer, const size_t ap_offset) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zhpr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1404,7 +1404,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_ssyr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
@@ -1415,7 +1415,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dsyr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
@@ -1442,7 +1442,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& ap_buffer, const size_t ap_offset) {
cblas_sspr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
@@ -1453,7 +1453,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& ap_buffer, const size_t ap_offset) {
cblas_dspr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
@@ -1481,7 +1481,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_ssyr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1494,7 +1494,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dsyr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1526,7 +1526,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& ap_buffer, const size_t ap_offset) {
cblas_sspr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1539,7 +1539,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& ap_buffer, const size_t ap_offset) {
cblas_dspr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1576,7 +1576,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_sgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1591,7 +1591,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1608,7 +1608,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1625,7 +1625,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1661,7 +1661,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1676,7 +1676,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1693,7 +1693,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1710,7 +1710,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1748,7 +1748,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chemm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1765,7 +1765,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhemm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1781,7 +1781,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
beta,
@@ -1794,7 +1794,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
beta,
@@ -1809,7 +1809,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
beta_array.data(),
@@ -1824,7 +1824,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
beta_array.data(),
@@ -1855,7 +1855,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const float beta,
std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_cherk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
beta,
@@ -1868,7 +1868,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const double beta,
std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_zherk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
beta,
@@ -1884,7 +1884,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1899,7 +1899,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1916,7 +1916,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1933,7 +1933,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1970,7 +1970,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cher2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1986,7 +1986,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zher2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -2001,7 +2001,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_strmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2012,7 +2012,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_dtrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2024,7 +2024,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_ctrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
@@ -2036,7 +2036,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_ztrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
@@ -2063,7 +2063,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_strsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2074,7 +2074,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_dtrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2086,7 +2086,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_ctrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
@@ -2098,7 +2098,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_ztrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp
new file mode 100644
index 00000000..35b1b9c6
--- /dev/null
+++ b/test/wrapper_cublas.hpp
@@ -0,0 +1,2548 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements a wrapper around the cuBLAS library, such that its routines can be called
+// in a similar way as the CLBlast routines: using alpha and beta to determine the precision.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_WRAPPER_CUBLAS_H_
+#define CLBLAST_TEST_WRAPPER_CUBLAS_H_
+
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+
+#include "utilities/utilities.hpp"
+
+namespace clblast {
+
+// Conversions from CLBlast types
+cublasOperation_t convertToCUBLAS(const Transpose v) { return (v == Transpose::kNo) ? CUBLAS_OP_N : (v == Transpose::kYes) ? CUBLAS_OP_T : CUBLAS_OP_C; }
+cublasFillMode_t convertToCUBLAS(const Triangle v) { return (v == Triangle::kUpper) ? CUBLAS_FILL_MODE_UPPER : CUBLAS_FILL_MODE_LOWER; }
+cublasDiagType_t convertToCUBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT; }
+cublasSideMode_t convertToCUBLAS(const Side v) { return (v == Side::kLeft) ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; }
+
+// =================================================================================================
+// BLAS level-1 (vector-vector) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SROTG/DROTG
+template <typename T>
+cublasStatus_t cublasXrotg(cublasHandle_t handle, T* sa_buffer, const size_t sa_offset,
+ T* sb_buffer, const size_t sb_offset,
+ T* sc_buffer, const size_t sc_offset,
+ T* ss_buffer, const size_t ss_offset);
+template <>
+cublasStatus_t cublasXrotg<float>(cublasHandle_t handle, float* sa_buffer, const size_t sa_offset,
+ float* sb_buffer, const size_t sb_offset,
+ float* sc_buffer, const size_t sc_offset,
+ float* ss_buffer, const size_t ss_offset) {
+ auto status = cublasSrotg(handle, &sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotg<double>(cublasHandle_t handle, double* sa_buffer, const size_t sa_offset,
+ double* sb_buffer, const size_t sb_offset,
+ double* sc_buffer, const size_t sc_offset,
+ double* ss_buffer, const size_t ss_offset) {
+ auto status = cublasDrotg(handle, &sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROTMG/DROTMG
+template <typename T>
+cublasStatus_t cublasXrotmg(cublasHandle_t handle, T* sd1_buffer, const size_t sd1_offset,
+ T* sd2_buffer, const size_t sd2_offset,
+ T* sx1_buffer, const size_t sx1_offset,
+ const T* sy1_buffer, const size_t sy1_offset,
+ T* sparam_buffer, const size_t sparam_offset);
+template <>
+cublasStatus_t cublasXrotmg<float>(cublasHandle_t handle, float* sd1_buffer, const size_t sd1_offset,
+ float* sd2_buffer, const size_t sd2_offset,
+ float* sx1_buffer, const size_t sx1_offset,
+ const float* sy1_buffer, const size_t sy1_offset,
+ float* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ &sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotmg<double>(cublasHandle_t handle, double* sd1_buffer, const size_t sd1_offset,
+ double* sd2_buffer, const size_t sd2_offset,
+ double* sx1_buffer, const size_t sx1_offset,
+ const double* sy1_buffer, const size_t sy1_offset,
+ double* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ &sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROT/DROT
+cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc,
+ const float cos,
+ const float sin) {
+ auto status = cublasSrot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &cos,
+ &sin);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc,
+ const double cos,
+ const double sin) {
+ auto status = cublasDrot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &cos,
+ &sin);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROTM/DROTM
+template <typename T>
+cublasStatus_t cublasXrotm(cublasHandle_t handle, const size_t n,
+ T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc,
+ T* sparam_buffer, const size_t sparam_offset);
+template <>
+cublasStatus_t cublasXrotm<float>(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasSrotm(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotm<double>(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasDrotm(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
+template <typename T>
+cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n,
+ T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXswap<float>(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSswap(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<double>(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDswap(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<float2>(cublasHandle_t handle, const size_t n,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCswap(handle, static_cast<int>(n),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<double2>(cublasHandle_t handle, const size_t n,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZswap(handle, static_cast<int>(n),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<half>(cublasHandle_t handle, const size_t n,
+ half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const float alpha,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSscal(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const double alpha,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDscal(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const float2 alpha,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCscal(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const double2 alpha,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZscal(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const half alpha,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
+template <typename T>
+cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXcopy<float>(cublasHandle_t handle, const size_t n,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasScopy(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<double>(cublasHandle_t handle, const size_t n,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDcopy(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<float2>(cublasHandle_t handle, const size_t n,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCcopy(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<double2>(cublasHandle_t handle, const size_t n,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZcopy(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<half>(cublasHandle_t handle, const size_t n,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSaxpy(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDaxpy(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCaxpy(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZaxpy(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SDOT/DDOT
+template <typename T>
+cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdot<float>(cublasHandle_t handle, const size_t n,
+ float* dot_buffer, const size_t dot_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSdot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &dot_buffer[dot_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdot<double>(cublasHandle_t handle, const size_t n,
+ double* dot_buffer, const size_t dot_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDdot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &dot_buffer[dot_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdot<half>(cublasHandle_t handle, const size_t n,
+ half* dot_buffer, const size_t dot_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CDOTU/ZDOTU
+template <typename T>
+cublasStatus_t cublasXdotu(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdotu<float2>(cublasHandle_t handle, const size_t n,
+ float2* dot_buffer, const size_t dot_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCdotu(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdotu<double2>(cublasHandle_t handle, const size_t n,
+ double2* dot_buffer, const size_t dot_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZdotu(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CDOTC/ZDOTC
+template <typename T>
+cublasStatus_t cublasXdotc(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdotc<float2>(cublasHandle_t handle, const size_t n,
+ float2* dot_buffer, const size_t dot_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCdotc(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdotc<double2>(cublasHandle_t handle, const size_t n,
+ double2* dot_buffer, const size_t dot_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZdotc(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
+template <typename T>
+cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n,
+ T* nrm2_buffer, const size_t nrm2_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXnrm2<float>(cublasHandle_t handle, const size_t n,
+ float* nrm2_buffer, const size_t nrm2_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSnrm2(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &nrm2_buffer[nrm2_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<double>(cublasHandle_t handle, const size_t n,
+ double* nrm2_buffer, const size_t nrm2_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDnrm2(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &nrm2_buffer[nrm2_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<float2>(cublasHandle_t handle, const size_t n,
+ float2* nrm2_buffer, const size_t nrm2_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasScnrm2(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&nrm2_buffer[nrm2_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<double2>(cublasHandle_t handle, const size_t n,
+ double2* nrm2_buffer, const size_t nrm2_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDznrm2(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&nrm2_buffer[nrm2_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<half>(cublasHandle_t handle, const size_t n,
+ half* nrm2_buffer, const size_t nrm2_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SASUM/DASUM/ScASUM/DzASUM
+template <typename T>
+cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n,
+ T* asum_buffer, const size_t asum_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXasum<float>(cublasHandle_t handle, const size_t n,
+ float* asum_buffer, const size_t asum_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSasum(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &asum_buffer[asum_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<double>(cublasHandle_t handle, const size_t n,
+ double* asum_buffer, const size_t asum_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDasum(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &asum_buffer[asum_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<float2>(cublasHandle_t handle, const size_t n,
+ float2* asum_buffer, const size_t asum_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasScasum(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&asum_buffer[asum_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<double2>(cublasHandle_t handle, const size_t n,
+ double2* asum_buffer, const size_t asum_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDzasum(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&asum_buffer[asum_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<half>(cublasHandle_t handle, const size_t n,
+ half* asum_buffer, const size_t asum_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
+template <typename T>
+cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n,
+ T* imax_buffer, const size_t imax_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXamax<float>(cublasHandle_t handle, const size_t n,
+ float* imax_buffer, const size_t imax_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIsamax(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<double>(cublasHandle_t handle, const size_t n,
+ double* imax_buffer, const size_t imax_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIdamax(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<float2>(cublasHandle_t handle, const size_t n,
+ float2* imax_buffer, const size_t imax_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIcamax(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<double2>(cublasHandle_t handle, const size_t n,
+ double2* imax_buffer, const size_t imax_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIzamax(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<half>(cublasHandle_t handle, const size_t n,
+ half* imax_buffer, const size_t imax_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// =================================================================================================
+// BLAS level-2 (matrix-vector) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHEMV/ZHEMV
+cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChemv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhemv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHBMV/ZHBMV
+cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPMV/ZHPMV
+cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* ap_buffer, const size_t ap_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChpmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* ap_buffer, const size_t ap_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhpmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYMV/DSYMV
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsymv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsymv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSBMV/DSBMV
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPMV/DSPMV
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* ap_buffer, const size_t ap_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* ap_buffer, const size_t ap_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* ap_buffer, const size_t ap_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
+template <typename T>
+cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtrmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
+template <typename T>
+cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtbmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
+template <typename T>
+cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* ap_buffer, const size_t ap_offset,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtpmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* ap_buffer, const size_t ap_offset,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* ap_buffer, const size_t ap_offset,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* ap_buffer, const size_t ap_offset,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* ap_buffer, const size_t ap_offset,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const half* ap_buffer, const size_t ap_offset,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
+template <typename T>
+cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtrsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV
+template <typename T>
+cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtbsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV
+template <typename T>
+cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* ap_buffer, const size_t ap_offset,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtpsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* ap_buffer, const size_t ap_offset,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* ap_buffer, const size_t ap_offset,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* ap_buffer, const size_t ap_offset,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* ap_buffer, const size_t ap_offset,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SGER/DGER
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSger(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDger(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CGERU/ZGERU
+cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCgeru(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZgeru(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CGERC/ZGERC
+cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCgerc(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZgerc(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHER/ZHER
+cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCher(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZher(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPR/ZHPR
+cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasChpr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZhpr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHER2/ZHER2
+cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCher2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZher2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPR2/ZHPR2
+cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasChpr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZhpr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYR/DSYR
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPR/DSPR
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* ap_buffer, const size_t ap_offset) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSYR2/DSYR2
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPR2/DSPR2
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* ap_buffer, const size_t ap_offset) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// =================================================================================================
+// BLAS level-3 (matrix-matrix) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHEMM/ZHEMM
+cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChemm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhemm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHERK/ZHERK
+cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCherk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZherk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHER2K/ZHER2K
+cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCher2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZher2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* b_buffer, const size_t b_offset, const size_t b_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_WRAPPER_CUBLAS_H_
+#endif
diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp
new file mode 100644
index 00000000..c97ae3ef
--- /dev/null
+++ b/test/wrapper_cuda.hpp
@@ -0,0 +1,149 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file contains all the CUDA related code; used only in case of testing against cuBLAS
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_WRAPPER_CUDA_H_
+#define CLBLAST_TEST_WRAPPER_CUDA_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <stdexcept>
+
+#include "utilities/utilities.hpp"
+
+#ifdef CLBLAST_REF_CUBLAS
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+#endif
+
+namespace clblast {
+// =================================================================================================
+
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void cublasSetup(Arguments<T> &args) {
+ cudaSetDevice(static_cast<int>(args.device_id));
+ auto status = cublasCreate(reinterpret_cast<cublasHandle_t*>(&args.cublas_handle));
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ throw std::runtime_error("CUDA cublasCreate error");
+ }
+ }
+#endif
+
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void cublasTeardown(Arguments<T> &args) {
+ auto status = cublasDestroy(reinterpret_cast<cublasHandle_t>(args.cublas_handle));
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ throw std::runtime_error("CUDA cublasDestroy error");
+ }
+ }
+#endif
+
+// =================================================================================================
+
+// Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void CUDAToHost(T** buffer_cuda, std::vector<T> &buffer_host, const size_t size) {
+ auto status1 = cudaMemcpy(
+ reinterpret_cast<void*>(buffer_host.data()),
+ reinterpret_cast<void*>(*buffer_cuda),
+ size*sizeof(T),
+ cudaMemcpyDeviceToHost
+ );
+ if (status1 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast<int>(status1)));
+ }
+ auto status2 = cudaFree(*buffer_cuda);
+ if (status2 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaFree error with status: "+ToString(static_cast<int>(status2)));
+ }
+ *buffer_cuda = nullptr;
+}
+#else
+ template <typename T> void CUDAToHost(T**, const std::vector<T>&, const size_t) { }
+#endif
+
+// Allocates space on the CUDA device and copies in data from the host
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void HostToCUDA(T** buffer_cuda, std::vector<T> &buffer_host, const size_t size) {
+ if (*buffer_cuda == nullptr) {
+ auto status1 = cudaMalloc(reinterpret_cast<void**>(buffer_cuda), size*sizeof(T));
+ if (status1 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMalloc error with status: "+ToString(static_cast<int>(status1)));
+ }
+ }
+ auto status2 = cudaMemcpy(
+ reinterpret_cast<void*>(*buffer_cuda),
+ reinterpret_cast<void*>(buffer_host.data()),
+ size*sizeof(T),
+ cudaMemcpyHostToDevice
+ );
+ if (status2 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast<int>(status2)));
+ }
+ }
+#else
+ template <typename T> void HostToCUDA(T**, const std::vector<T>&, const size_t) { }
+#endif
+
+// =================================================================================================
+
+template <typename T>
+struct BuffersCUDA {
+ T* x_vec = nullptr;
+ T* y_vec = nullptr;
+ T* a_mat = nullptr;
+ T* b_mat = nullptr;
+ T* c_mat = nullptr;
+ T* ap_mat = nullptr;
+ T* scalar = nullptr;
+};
+
+template <typename T, typename U>
+void CUDAToHost(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host,
+ const std::vector<std::string> &names) {
+ for (auto &name: names) {
+ if (name == kBufVecX) { buffers_host.x_vec = std::vector<T>(args.x_size, static_cast<T>(0)); CUDAToHost(&buffers.x_vec, buffers_host.x_vec, args.x_size); }
+ else if (name == kBufVecY) { buffers_host.y_vec = std::vector<T>(args.y_size, static_cast<T>(0)); CUDAToHost(&buffers.y_vec, buffers_host.y_vec, args.y_size); }
+ else if (name == kBufMatA) { buffers_host.a_mat = std::vector<T>(args.a_size, static_cast<T>(0)); CUDAToHost(&buffers.a_mat, buffers_host.a_mat, args.a_size); }
+ else if (name == kBufMatB) { buffers_host.b_mat = std::vector<T>(args.b_size, static_cast<T>(0)); CUDAToHost(&buffers.b_mat, buffers_host.b_mat, args.b_size); }
+ else if (name == kBufMatC) { buffers_host.c_mat = std::vector<T>(args.c_size, static_cast<T>(0)); CUDAToHost(&buffers.c_mat, buffers_host.c_mat, args.c_size); }
+ else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector<T>(args.ap_size, static_cast<T>(0)); CUDAToHost(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); }
+ else if (name == kBufScalar) { buffers_host.scalar = std::vector<T>(args.scalar_size, static_cast<T>(0)); CUDAToHost(&buffers.scalar, buffers_host.scalar, args.scalar_size); }
+ else { throw std::runtime_error("Invalid buffer name"); }
+ }
+}
+
+template <typename T, typename U>
+void HostToCUDA(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host,
+ const std::vector<std::string> &names) {
+ for (auto &name: names) {
+ if (name == kBufVecX) { HostToCUDA(&buffers.x_vec, buffers_host.x_vec, args.x_size); }
+ else if (name == kBufVecY) { HostToCUDA(&buffers.y_vec, buffers_host.y_vec, args.y_size); }
+ else if (name == kBufMatA) { HostToCUDA(&buffers.a_mat, buffers_host.a_mat, args.a_size); }
+ else if (name == kBufMatB) { HostToCUDA(&buffers.b_mat, buffers_host.b_mat, args.b_size); }
+ else if (name == kBufMatC) { HostToCUDA(&buffers.c_mat, buffers_host.c_mat, args.c_size); }
+ else if (name == kBufMatAP) { HostToCUDA(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); }
+ else if (name == kBufScalar) { HostToCUDA(&buffers.scalar, buffers_host.scalar, args.scalar_size); }
+ else { throw std::runtime_error("Invalid buffer name"); }
+ }
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_WRAPPER_CUDA_H_
+#endif