diff options
162 files changed, 3514 insertions, 1134 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 62cf00cc..0fb04071 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,18 +129,19 @@ if(TUNERS) endif() endif() -# Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake" -# and "FindCBLAS.cmake" are included. +# Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake", +# "FindCBLAS.cmake" and "FindcuBLAS.cmake" are included. if(CLIENTS OR TESTS) find_package(clBLAS) find_package(CBLAS) - if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND) + find_package(cuBLAS) + if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND AND NOT CUBLAS_FOUND) if(TESTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, disabling the compilation of the tests") set(TESTS OFF) endif() if(CLIENTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS, head-to-head performance comparison not supported in the clients") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, head-to-head performance comparison not supported in the clients") endif() endif() endif() @@ -320,13 +321,22 @@ if(CLIENTS OR TESTS) add_definitions(" -DCLBLAST_REF_CBLAS") endif() endif() + if(CUBLAS_FOUND) + set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS}) + set(REF_LIBRARIES ${REF_LIBRARIES} ${CUDA_LIBRARIES} ${CUBLAS_LIBRARIES}) + if(MSVC) + add_definitions(" /DCLBLAST_REF_CUBLAS") + else() + add_definitions(" -DCLBLAST_REF_CUBLAS") + endif() + endif() endif() # ================================================================================================== # Section for the performance tests (i.e. the client). These compare against optionally a reference -# library, either clBLAS or a CPU BLAS. +# library, either clBLAS, a CPU BLAS, or CUDA's cuBLAS. if(CLIENTS) # Visual Studio requires the sources of non-exported objects/libraries @@ -372,7 +382,7 @@ endif() # ================================================================================================== # Section for the correctness tests. Note that these tests require the presence of clBLAS and/or a -# CPU BLAS library to act as a reference. +# CPU BLAS library, and/or cuBLAS to act as a reference. if(TESTS) enable_testing() diff --git a/cmake/Modules/FindcuBLAS.cmake b/cmake/Modules/FindcuBLAS.cmake new file mode 100644 index 00000000..e470289b --- /dev/null +++ b/cmake/Modules/FindcuBLAS.cmake @@ -0,0 +1,82 @@ + +# ================================================================================================== +# This file is part of the cuBLASt project. The project is licensed under Apache Version 2.0. This +# project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +# width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# ================================================================================================== +# +# Defines the following variables: +# CUBLAS_FOUND Boolean holding whether or not the cuBLAS library was found +# CUBLAS_INCLUDE_DIRS The CUDA and cuBLAS include directory +# CUDA_LIBRARIES The CUDA library +# CUBLAS_LIBRARIES The cuBLAS library +# +# In case CUDA is not installed in the default directory, set the CUDA_ROOT variable to point to +# the root of cuBLAS, such that 'cublas_v2.h' can be found in $CUDA_ROOT/include. This can either be +# done using an environmental variable (e.g. export CUDA_ROOT=/path/to/cuBLAS) or using a CMake +# variable (e.g. cmake -DCUDA_ROOT=/path/to/cuBLAS ..). +# +# ================================================================================================== + +# Sets the possible install locations +set(CUBLAS_HINTS + ${CUDA_ROOT} + $ENV{CUDA_ROOT} + $ENV{CUDA_TOOLKIT_ROOT_DIR} +) +set(CUBLAS_PATHS + /usr + /usr/local + /usr/local/cuda +) + +# Finds the include directories +find_path(CUBLAS_INCLUDE_DIRS + NAMES cublas_v2.h cuda.h + HINTS ${CUBLAS_HINTS} + PATH_SUFFIXES include inc include/x86_64 include/x64 + PATHS ${CUBLAS_PATHS} + DOC "cuBLAS include header cublas_v2.h" +) +mark_as_advanced(CUBLAS_INCLUDE_DIRS) + +# Finds the libraries +find_library(CUDA_LIBRARIES + NAMES cudart + HINTS ${CUBLAS_HINTS} + PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 lib/import lib64/import + PATHS ${CUBLAS_PATHS} + DOC "CUDA library" +) +mark_as_advanced(CUDA_LIBRARIES) +find_library(CUBLAS_LIBRARIES + NAMES cublas + HINTS ${CUBLAS_HINTS} + PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 lib/import lib64/import + PATHS ${CUBLAS_PATHS} + DOC "cuBLAS library" +) +mark_as_advanced(CUBLAS_LIBRARIES) + +# ================================================================================================== + +# Notification messages +if(NOT CUBLAS_INCLUDE_DIRS) + message(STATUS "Could NOT find 'cuBLAS.h', install CUDA/cuBLAS or set CUDA_ROOT") +endif() +if(NOT CUDA_LIBRARIES) + message(STATUS "Could NOT find CUDA library, install it or set CUDA_ROOT") +endif() +if(NOT CUBLAS_LIBRARIES) + message(STATUS "Could NOT find cuBLAS library, install it or set CUDA_ROOT") +endif() + +# Determines whether or not cuBLAS was found +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(cuBLAS DEFAULT_MSG CUBLAS_INCLUDE_DIRS CUDA_LIBRARIES CUBLAS_LIBRARIES) + +# ================================================================================================== diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 6ec67052..f7ef4528 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -38,11 +38,12 @@ FILES = [ "/src/clblast_c.cpp", "/test/wrapper_clblas.hpp", "/test/wrapper_cblas.hpp", + "/test/wrapper_cublas.hpp", "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", ] -HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32] -FOOTER_LINES = [25, 139, 27, 38, 6, 6, 9, 2] +HEADER_LINES = [122, 77, 126, 23, 29, 41, 29, 65, 32] +FOOTER_LINES = [25, 139, 27, 38, 6, 6, 6, 9, 2] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 @@ -194,7 +195,7 @@ def main(argv): # Re-writes the body of the file with open(library_root + FILES[i], "w") as f: body = "" - levels = [1, 2, 3] if (i == 4 or i == 5) else [1, 2, 3, 4] + levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4] for level in levels: body += cpp.LEVEL_SEPARATORS[level - 1] + "\n" for routine in ROUTINES[level - 1]: @@ -211,9 +212,11 @@ def main(argv): if i == 5: body += cpp.wrapper_cblas(routine) if i == 6: + body += cpp.wrapper_cublas(routine) + if i == 7: if not routine.batched: body += cpp.clblast_netlib_c_h(routine) - if i == 7: + if i == 8: if not routine.batched: body += cpp.clblast_netlib_c_cc(routine) f.write("".join(file_header)) diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index c0309ec3..07f45669 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -56,6 +56,19 @@ def option_to_cblas(x): }[x] +def option_to_cublas(x): + """As above, but for clBLAS data-types""" + return { + 'layout': "Layout", + 'a_transpose': "cublasOperation_t", + 'b_transpose': "cublasOperation_t", + 'ab_transpose': "cublasOperation_t", + 'side': "cublasSideMode_t", + 'triangle': "cublasFillMode_t", + 'diagonal': "cublasDiagType_t", + }[x] + + def option_to_documentation(x): """Translates an option name to a documentation string""" return { diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 91fdf458..7c695dc8 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -290,14 +290,71 @@ def wrapper_cblas(routine): return result +def wrapper_cublas(routine): + """The wrapper to the reference cuBLAS routines (for performance/correctness testing)""" + result = "" + if routine.has_tests: + result += NL + "// Forwards the cuBLAS calls for %s" % routine.short_names_tested() + NL + if routine.no_scalars(): + result += routine.routine_header_wrapper_cublas(routine.template, True, 23) + ";" + NL + for flavour in routine.flavours: + result += routine.routine_header_wrapper_cublas(flavour, False, 23) + " {" + NL + + # There is a version available in cuBLAS + if flavour.precision_name in ["S", "D", "C", "Z"]: + indent = " " * (24 + routine.length()) + arguments = routine.arguments_wrapper_cublas(flavour) + + # Handles row-major + if routine.has_layout(): + result += " if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }" + NL + + # Complex scalars + for scalar in routine.scalars: + if flavour.is_complex(scalar): + cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex" + result += " " + cuda_complex + " " + scalar + "_cuda;" + NL + result += " " + scalar + "_cuda.x = " + scalar + ".real();" + NL + result += " " + scalar + "_cuda.y = " + scalar + ".imag();" + NL + + # Calls the cuBLAS routine + result += " cublasHandle_t handle;" + NL + result += " auto status = cublas" + flavour.name_cublas() + routine.name + "(handle, " + result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL + result += " cublasDestroy(handle);" + NL + result += " return status;" + + # There is no cuBLAS available, forward the call to one of the available functions + else: # Half-precision + result += " return CUBLAS_STATUS_NOT_SUPPORTED;" + # indent = " " * (24 + routine.length()) + + # # Convert to float (note: also integer buffers are stored as half/float) + # for buf in routine.inputs + routine.outputs: + # result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL + + # # Call the float routine + # result += " cublasHandle_t handle;" + NL + # result += " auto status = cublasX" + routine.name + "(handle," + # result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL + # result += " cublasDestroy(handle);" + NL + # result += " return status;" + NL + + # # Convert back to half + # for buf in routine.outputs: + # result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL + # result += " return status;" + + # Complete + result += NL + "}" + NL + return result + + def performance_test(routine, level_string): """Generates the body of a performance test for a specific routine""" result = "" result += "#include \"test/performance/client.hpp\"" + NL result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL - result += "// Shortcuts to the clblast namespace" + NL - result += "using float2 = clblast::float2;" + NL - result += "using double2 = clblast::double2;" + NL + NL result += "// Main function (not within the clblast namespace)" + NL result += "int main(int argc, char *argv[]) {" + NL result += " const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);" + NL @@ -324,9 +381,6 @@ def correctness_test(routine, level_string): result = "" result += "#include \"test/correctness/testblas.hpp\"" + NL result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL - result += "// Shortcuts to the clblast namespace" + NL - result += "using float2 = clblast::float2;" + NL - result += "using double2 = clblast::double2;" + NL + NL result += "// Main function (not within the clblast namespace)" + NL result += "int main(int argc, char *argv[]) {" + NL result += " auto errors = size_t{0};" + NL diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py index cfdbf748..6ac5681a 100644 --- a/scripts/generator/generator/datatype.py +++ b/scripts/generator/generator/datatype.py @@ -72,9 +72,11 @@ class DataType: def test_template(self): """Returns the template as used in the correctness/performance tests""" + buffer_type = "clblast::" + self.buffer_type if self.buffer_type in [D_FLOAT2, D_DOUBLE2] else self.buffer_type + beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_FLOAT2, D_DOUBLE2] else self.beta_cpp if self.buffer_type != self.beta_cpp: - return "<" + self.buffer_type + "," + self.beta_cpp + ">, " + self.buffer_type + ", " + self.beta_cpp - return "<" + self.buffer_type + ">, " + self.buffer_type + ", " + self.beta_cpp + return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp + return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp def is_complex(self, scalar): """Current scalar is complex""" @@ -85,6 +87,11 @@ class DataType: """Current type is of a non-standard type""" return self.buffer_type in [D_HALF, D_FLOAT2, D_DOUBLE2] + def name_cublas(self): + if "i" in self.name: + return "I" + self.name[1].lower() + return self.name + # Regular data-types H = DataType("H", "H", D_HALF, [D_HALF] * 2 + [D_HALF_OPENCL] * 2, D_HALF) # half (16) diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 59b2ed73..b1db484f 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -197,6 +197,10 @@ class Routine: """Determines whether or not this routine has scalar arguments (alpha/beta)""" return self.scalars == [] + def has_layout(self): + """Determines whether the layout is an argument""" + return "layout" in self.options + def short_names(self): """Returns the upper-case names of these routines (all flavours)""" return "/".join([f.name + self.upper_name() for f in self.flavours]) @@ -257,7 +261,7 @@ class Routine: return [] def buffer_def_wrapper_cl(self, name, flavour): - """As above but with data-types""" + """As above but for OpenCL""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"] @@ -266,6 +270,16 @@ class Routine: return [", ".join(a + b + c)] return [] + def buffer_def_wrapper_cuda(self, name, flavour): + """As above but for CUDA""" + prefix = "const " if name in self.inputs else "" + if name in self.inputs or name in self.outputs: + a = [prefix + flavour.buffer_type + "* " + name + "_buffer"] + b = ["const size_t " + name + "_offset"] + c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] + return [", ".join(a + b + c)] + return [] + def buffer_def_vector(self, name, flavour): """As above but as vectors""" prefix = "const " if name in self.inputs else "" @@ -329,6 +343,24 @@ class Routine: return [", ".join(a + c)] return [] + def buffer_wrapper_cublas(self, name, flavour): + """As above but for cuBLAS the wrapper""" + prefix = "const " if name in self.inputs else "" + if name in self.inputs or name in self.outputs: + if flavour.precision_name in ["C", "Z"]: + cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex" + a = ["reinterpret_cast<" + prefix + cuda_complex + "*>" + + "(&" + name + "_buffer[" + name + "_offset])"] + else: + a = ["&" + name + "_buffer[" + name + "_offset]"] + c = [] + if name in ["x", "y"]: + c = ["static_cast<int>(" + name + "_" + self.postfix(name) + ")"] + elif name in ["a", "b", "c"]: + c = [name + "_" + self.postfix(name)] + return [", ".join(a + c)] + return [] + def buffer_type(self, name): """As above, but only data-types""" prefix = "const " if (name in self.inputs) else "" @@ -407,6 +439,14 @@ class Routine: return [name] return [] + def scalar_use_wrapper_cublas(self, name, flavour): + """As above, but for the cuBLAS wrapper""" + if name in self.scalars: + if flavour.is_complex(name): + return ["&" + name + "_cuda"] + return ["&" + name] + return [] + def scalar_def(self, name, flavour): """Retrieves the definition of a scalar (alpha/beta)""" if name in self.scalars: @@ -465,6 +505,12 @@ class Routine: return [", ".join([s for s in self.sizes])] return [] + def sizes_list_as_int(self): + """Retrieves a list of comma-separated sizes (m, n, k) cast to integers""" + if self.sizes: + return [", ".join(["static_cast<int>(" + s + ")" for s in self.sizes])] + return [] + def sizes_def(self): """Retrieves the definition of the sizes (m,n,k)""" if self.sizes: @@ -496,6 +542,15 @@ class Routine: return [", ".join(self.options)] return [] + def options_list_no_layout(self): + """Retrieves a list of options""" + options = self.options[:] + if "layout" in options: + options.remove("layout") + if options: + return [", ".join(options)] + return [] + def options_cast(self, indent): """As above, but now casted to CLBlast data-types""" if self.options: @@ -531,6 +586,13 @@ class Routine: return [", ".join(definitions)] return [] + def options_def_wrapper_cublas(self): + """As above, but now using cuBLAS data-types""" + if self.options: + definitions = ["const " + convert.option_to_cublas(o) + " " + o for o in self.options] + return [", ".join(definitions)] + return [] + def options_type(self): """Retrieves the types of the options (layout, transpose, side, etc.)""" if self.options: @@ -615,7 +677,7 @@ class Routine: def arguments_wrapper_cblas(self, flavour): """As above, but for the CBLAS wrapper""" - return (self.options_list() + self.sizes_list() + + return (self.options_list() + self.sizes_list_as_int() + self.scalar_use_wrapper_cblas("alpha", flavour) + list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.buffers_first()])) + self.scalar_use_wrapper_cblas("beta", flavour) + @@ -623,6 +685,17 @@ class Routine: list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_use_wrapper_cblas(s, flavour) for s in self.other_scalars()]))) + def arguments_wrapper_cublas(self, flavour): + """As above, but for the cuBLAS wrapper""" + return (self.options_list_no_layout() + self.sizes_list_as_int() + + self.scalar_use_wrapper_cublas("alpha", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_first()])) + + self.scalar_use_wrapper_cublas("beta", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_second()])) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_first()])) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_use_wrapper_cublas(s, flavour) for s in self.other_scalars()]))) + def arguments_def(self, flavour): """Retrieves a combination of all the argument definitions""" return (self.options_def() + self.sizes_def() + @@ -683,6 +756,17 @@ class Routine: list(chain(*[self.buffer_def_vector(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_def_wrapper_cublas(self, flavour): + """As above, but cuBLAS wrapper plain data-types""" + return (self.options_def_wrapper_cublas() + self.sizes_def() + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_first()])) + + self.scalar_def_plain("alpha", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_first()])) + + self.scalar_def_plain("beta", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_second()])) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_type(self, flavour): """Retrieves a combination of all the argument types""" return (self.options_type() + self.sizes_type() + @@ -781,3 +865,17 @@ class Routine: result = "void cblasX" + self.name + "(" result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cblas(flavour)]) + ")" return result + + def routine_header_wrapper_cublas(self, flavour, def_only, spaces): + """As above, but now for the cuBLAS wrapper""" + template = "<" + flavour.template + ">" if self.no_scalars() and not def_only else "" + indent = " " * (spaces + self.length() + len(template)) + result = "" + if self.no_scalars(): + result += "template <" + if def_only: + result += flavour.name + result += ">\n" + result += "cublasStatus_t cublasX" + self.name + template + "(" + result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")" + return result diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 535560a3..7aadb983 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -81,6 +81,7 @@ constexpr auto kArgFraction = "fraction"; // The client-specific arguments in string form constexpr auto kArgCompareclblas = "clblas"; constexpr auto kArgComparecblas = "cblas"; +constexpr auto kArgComparecublas = "cublas"; constexpr auto kArgStepSize = "step"; constexpr auto kArgNumSteps = "num_steps"; constexpr auto kArgNumRuns = "runs"; @@ -188,6 +189,7 @@ struct Arguments { // Client-specific arguments int compare_clblas = 1; int compare_cblas = 1; + int compare_cublas = 1; size_t step = 1; size_t num_steps = 0; size_t num_runs = 10; diff --git a/test/correctness/routines/level1/xamax.cpp b/test/correctness/routines/level1/xamax.cpp index 607637e8..d940ae7a 100644 --- a/test/correctness/routines/level1/xamax.cpp +++ b/test/correctness/routines/level1/xamax.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xamax.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXamax<float>, float, float>(argc, argv, false, "iSAMAX"); errors += clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX"); - errors += clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX"); - errors += clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX"); + errors += clblast::RunTests<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "iCAMAX"); + errors += clblast::RunTests<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "iZAMAX"); errors += clblast::RunTests<clblast::TestXamax<half>, half, half>(argc, argv, true, "iHAMAX"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xasum.cpp b/test/correctness/routines/level1/xasum.cpp index e22e42a6..b969d662 100644 --- a/test/correctness/routines/level1/xasum.cpp +++ b/test/correctness/routines/level1/xasum.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xasum.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXasum<float>, float, float>(argc, argv, false, "SASUM"); errors += clblast::RunTests<clblast::TestXasum<double>, double, double>(argc, argv, true, "DASUM"); - errors += clblast::RunTests<clblast::TestXasum<float2>, float2, float2>(argc, argv, true, "ScASUM"); - errors += clblast::RunTests<clblast::TestXasum<double2>, double2, double2>(argc, argv, true, "DzASUM"); + errors += clblast::RunTests<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScASUM"); + errors += clblast::RunTests<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzASUM"); errors += clblast::RunTests<clblast::TestXasum<half>, half, half>(argc, argv, true, "HASUM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xaxpy.cpp b/test/correctness/routines/level1/xaxpy.cpp index 064172fa..6f4f34fb 100644 --- a/test/correctness/routines/level1/xaxpy.cpp +++ b/test/correctness/routines/level1/xaxpy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xaxpy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXaxpy<float>, float, float>(argc, argv, false, "SAXPY"); errors += clblast::RunTests<clblast::TestXaxpy<double>, double, double>(argc, argv, true, "DAXPY"); - errors += clblast::RunTests<clblast::TestXaxpy<float2>, float2, float2>(argc, argv, true, "CAXPY"); - errors += clblast::RunTests<clblast::TestXaxpy<double2>, double2, double2>(argc, argv, true, "ZAXPY"); + errors += clblast::RunTests<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPY"); + errors += clblast::RunTests<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPY"); errors += clblast::RunTests<clblast::TestXaxpy<half>, half, half>(argc, argv, true, "HAXPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xcopy.cpp b/test/correctness/routines/level1/xcopy.cpp index e6f2581b..e6e94d34 100644 --- a/test/correctness/routines/level1/xcopy.cpp +++ b/test/correctness/routines/level1/xcopy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXcopy<float>, float, float>(argc, argv, false, "SCOPY"); errors += clblast::RunTests<clblast::TestXcopy<double>, double, double>(argc, argv, true, "DCOPY"); - errors += clblast::RunTests<clblast::TestXcopy<float2>, float2, float2>(argc, argv, true, "CCOPY"); - errors += clblast::RunTests<clblast::TestXcopy<double2>, double2, double2>(argc, argv, true, "ZCOPY"); + errors += clblast::RunTests<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CCOPY"); + errors += clblast::RunTests<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZCOPY"); errors += clblast::RunTests<clblast::TestXcopy<half>, half, half>(argc, argv, true, "HCOPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xdot.cpp b/test/correctness/routines/level1/xdot.cpp index 080250cb..8dccbf26 100644 --- a/test/correctness/routines/level1/xdot.cpp +++ b/test/correctness/routines/level1/xdot.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xdotc.cpp b/test/correctness/routines/level1/xdotc.cpp index 2a7bbeca..59eedddc 100644 --- a/test/correctness/routines/level1/xdotc.cpp +++ b/test/correctness/routines/level1/xdotc.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdotc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXdotc<float2>, float2, float2>(argc, argv, false, "CDOTC"); - errors += clblast::RunTests<clblast::TestXdotc<double2>, double2, double2>(argc, argv, true, "ZDOTC"); + errors += clblast::RunTests<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTC"); + errors += clblast::RunTests<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTC"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xdotu.cpp b/test/correctness/routines/level1/xdotu.cpp index 1047d021..4392326d 100644 --- a/test/correctness/routines/level1/xdotu.cpp +++ b/test/correctness/routines/level1/xdotu.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdotu.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXdotu<float2>, float2, float2>(argc, argv, false, "CDOTU"); - errors += clblast::RunTests<clblast::TestXdotu<double2>, double2, double2>(argc, argv, true, "ZDOTU"); + errors += clblast::RunTests<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTU"); + errors += clblast::RunTests<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTU"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xnrm2.cpp b/test/correctness/routines/level1/xnrm2.cpp index 142fa7ba..46ca1526 100644 --- a/test/correctness/routines/level1/xnrm2.cpp +++ b/test/correctness/routines/level1/xnrm2.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xnrm2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXnrm2<float>, float, float>(argc, argv, false, "SNRM2"); errors += clblast::RunTests<clblast::TestXnrm2<double>, double, double>(argc, argv, true, "DNRM2"); - errors += clblast::RunTests<clblast::TestXnrm2<float2>, float2, float2>(argc, argv, true, "ScNRM2"); - errors += clblast::RunTests<clblast::TestXnrm2<double2>, double2, double2>(argc, argv, true, "DzNRM2"); + errors += clblast::RunTests<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScNRM2"); + errors += clblast::RunTests<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzNRM2"); errors += clblast::RunTests<clblast::TestXnrm2<half>, half, half>(argc, argv, true, "HNRM2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xrot.cpp b/test/correctness/routines/level1/xrot.cpp index 5af358eb..d5eb6516 100644 --- a/test/correctness/routines/level1/xrot.cpp +++ b/test/correctness/routines/level1/xrot.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotg.cpp b/test/correctness/routines/level1/xrotg.cpp index ad23a554..ec544eab 100644 --- a/test/correctness/routines/level1/xrotg.cpp +++ b/test/correctness/routines/level1/xrotg.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotm.cpp b/test/correctness/routines/level1/xrotm.cpp index 4f7e8f15..7f2d7ce6 100644 --- a/test/correctness/routines/level1/xrotm.cpp +++ b/test/correctness/routines/level1/xrotm.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotmg.cpp b/test/correctness/routines/level1/xrotmg.cpp index ca89bc12..4ef6e67d 100644 --- a/test/correctness/routines/level1/xrotmg.cpp +++ b/test/correctness/routines/level1/xrotmg.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotmg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xscal.cpp b/test/correctness/routines/level1/xscal.cpp index 939524be..c9788142 100644 --- a/test/correctness/routines/level1/xscal.cpp +++ b/test/correctness/routines/level1/xscal.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xscal.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXscal<float>, float, float>(argc, argv, false, "SSCAL"); errors += clblast::RunTests<clblast::TestXscal<double>, double, double>(argc, argv, true, "DSCAL"); - errors += clblast::RunTests<clblast::TestXscal<float2>, float2, float2>(argc, argv, true, "CSCAL"); - errors += clblast::RunTests<clblast::TestXscal<double2>, double2, double2>(argc, argv, true, "ZSCAL"); + errors += clblast::RunTests<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSCAL"); + errors += clblast::RunTests<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSCAL"); errors += clblast::RunTests<clblast::TestXscal<half>, half, half>(argc, argv, true, "HSCAL"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xswap.cpp b/test/correctness/routines/level1/xswap.cpp index 446f3d65..ee694a08 100644 --- a/test/correctness/routines/level1/xswap.cpp +++ b/test/correctness/routines/level1/xswap.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xswap.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXswap<float>, float, float>(argc, argv, false, "SSWAP"); errors += clblast::RunTests<clblast::TestXswap<double>, double, double>(argc, argv, true, "DSWAP"); - errors += clblast::RunTests<clblast::TestXswap<float2>, float2, float2>(argc, argv, true, "CSWAP"); - errors += clblast::RunTests<clblast::TestXswap<double2>, double2, double2>(argc, argv, true, "ZSWAP"); + errors += clblast::RunTests<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSWAP"); + errors += clblast::RunTests<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSWAP"); errors += clblast::RunTests<clblast::TestXswap<half>, half, half>(argc, argv, true, "HSWAP"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgbmv.cpp b/test/correctness/routines/level2/xgbmv.cpp index 8c49bc65..6aac283b 100644 --- a/test/correctness/routines/level2/xgbmv.cpp +++ b/test/correctness/routines/level2/xgbmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXgbmv<float>, float, float>(argc, argv, false, "SGBMV"); errors += clblast::RunTests<clblast::TestXgbmv<double>, double, double>(argc, argv, true, "DGBMV"); - errors += clblast::RunTests<clblast::TestXgbmv<float2>, float2, float2>(argc, argv, true, "CGBMV"); - errors += clblast::RunTests<clblast::TestXgbmv<double2>, double2, double2>(argc, argv, true, "ZGBMV"); + errors += clblast::RunTests<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGBMV"); + errors += clblast::RunTests<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGBMV"); errors += clblast::RunTests<clblast::TestXgbmv<half>, half, half>(argc, argv, true, "HGBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgemv.cpp b/test/correctness/routines/level2/xgemv.cpp index 902ae777..66994b89 100644 --- a/test/correctness/routines/level2/xgemv.cpp +++ b/test/correctness/routines/level2/xgemv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXgemv<float>, float, float>(argc, argv, false, "SGEMV"); errors += clblast::RunTests<clblast::TestXgemv<double>, double, double>(argc, argv, true, "DGEMV"); - errors += clblast::RunTests<clblast::TestXgemv<float2>, float2, float2>(argc, argv, true, "CGEMV"); - errors += clblast::RunTests<clblast::TestXgemv<double2>, double2, double2>(argc, argv, true, "ZGEMV"); + errors += clblast::RunTests<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMV"); + errors += clblast::RunTests<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMV"); errors += clblast::RunTests<clblast::TestXgemv<half>, half, half>(argc, argv, true, "HGEMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xger.cpp b/test/correctness/routines/level2/xger.cpp index ce61bbcb..3b5d16e9 100644 --- a/test/correctness/routines/level2/xger.cpp +++ b/test/correctness/routines/level2/xger.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xger.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xgerc.cpp b/test/correctness/routines/level2/xgerc.cpp index b747f20d..42f6bb45 100644 --- a/test/correctness/routines/level2/xgerc.cpp +++ b/test/correctness/routines/level2/xgerc.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgerc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXgerc<float2>, float2, float2>(argc, argv, false, "CGERC"); - errors += clblast::RunTests<clblast::TestXgerc<double2>, double2, double2>(argc, argv, true, "ZGERC"); + errors += clblast::RunTests<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERC"); + errors += clblast::RunTests<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERC"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgeru.cpp b/test/correctness/routines/level2/xgeru.cpp index f80c1e2b..f167eff5 100644 --- a/test/correctness/routines/level2/xgeru.cpp +++ b/test/correctness/routines/level2/xgeru.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgeru.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXgeru<float2>, float2, float2>(argc, argv, false, "CGERU"); - errors += clblast::RunTests<clblast::TestXgeru<double2>, double2, double2>(argc, argv, true, "ZGERU"); + errors += clblast::RunTests<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERU"); + errors += clblast::RunTests<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERU"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhbmv.cpp b/test/correctness/routines/level2/xhbmv.cpp index a4885c01..168d9474 100644 --- a/test/correctness/routines/level2/xhbmv.cpp +++ b/test/correctness/routines/level2/xhbmv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhbmv<float2>, float2, float2>(argc, argv, false, "CHBMV"); - errors += clblast::RunTests<clblast::TestXhbmv<double2>, double2, double2>(argc, argv, true, "ZHBMV"); + errors += clblast::RunTests<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHBMV"); + errors += clblast::RunTests<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhemv.cpp b/test/correctness/routines/level2/xhemv.cpp index 4318ffee..eabdf67d 100644 --- a/test/correctness/routines/level2/xhemv.cpp +++ b/test/correctness/routines/level2/xhemv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhemv<float2>, float2, float2>(argc, argv, false, "CHEMV"); - errors += clblast::RunTests<clblast::TestXhemv<double2>, double2, double2>(argc, argv, true, "ZHEMV"); + errors += clblast::RunTests<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMV"); + errors += clblast::RunTests<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xher.cpp b/test/correctness/routines/level2/xher.cpp index fe37bd76..a47a45ac 100644 --- a/test/correctness/routines/level2/xher.cpp +++ b/test/correctness/routines/level2/xher.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xher.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXher<float2,float>, float2, float>(argc, argv, false, "CHER"); - errors += clblast::RunTests<clblast::TestXher<double2,double>, double2, double>(argc, argv, true, "ZHER"); + errors += clblast::RunTests<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER"); + errors += clblast::RunTests<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xher2.cpp b/test/correctness/routines/level2/xher2.cpp index 0b4af4d0..544ab16d 100644 --- a/test/correctness/routines/level2/xher2.cpp +++ b/test/correctness/routines/level2/xher2.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xher2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXher2<float2>, float2, float2>(argc, argv, false, "CHER2"); - errors += clblast::RunTests<clblast::TestXher2<double2>, double2, double2>(argc, argv, true, "ZHER2"); + errors += clblast::RunTests<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHER2"); + errors += clblast::RunTests<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHER2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpmv.cpp b/test/correctness/routines/level2/xhpmv.cpp index dd77df71..30d23b8f 100644 --- a/test/correctness/routines/level2/xhpmv.cpp +++ b/test/correctness/routines/level2/xhpmv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhpmv<float2>, float2, float2>(argc, argv, false, "CHPMV"); - errors += clblast::RunTests<clblast::TestXhpmv<double2>, double2, double2>(argc, argv, true, "ZHPMV"); + errors += clblast::RunTests<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPMV"); + errors += clblast::RunTests<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpr.cpp b/test/correctness/routines/level2/xhpr.cpp index 5a3f615f..ed876857 100644 --- a/test/correctness/routines/level2/xhpr.cpp +++ b/test/correctness/routines/level2/xhpr.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhpr<float2,float>, float2, float>(argc, argv, false, "CHPR"); - errors += clblast::RunTests<clblast::TestXhpr<double2,double>, double2, double>(argc, argv, true, "ZHPR"); + errors += clblast::RunTests<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHPR"); + errors += clblast::RunTests<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHPR"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpr2.cpp b/test/correctness/routines/level2/xhpr2.cpp index 8218b444..b3bd167a 100644 --- a/test/correctness/routines/level2/xhpr2.cpp +++ b/test/correctness/routines/level2/xhpr2.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhpr2<float2>, float2, float2>(argc, argv, false, "CHPR2"); - errors += clblast::RunTests<clblast::TestXhpr2<double2>, double2, double2>(argc, argv, true, "ZHPR2"); + errors += clblast::RunTests<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPR2"); + errors += clblast::RunTests<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPR2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xsbmv.cpp b/test/correctness/routines/level2/xsbmv.cpp index 7918cb21..3b6b3972 100644 --- a/test/correctness/routines/level2/xsbmv.cpp +++ b/test/correctness/routines/level2/xsbmv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspmv.cpp b/test/correctness/routines/level2/xspmv.cpp index 78210660..9dccdbc1 100644 --- a/test/correctness/routines/level2/xspmv.cpp +++ b/test/correctness/routines/level2/xspmv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspr.cpp b/test/correctness/routines/level2/xspr.cpp index d05adf34..9cf242c1 100644 --- a/test/correctness/routines/level2/xspr.cpp +++ b/test/correctness/routines/level2/xspr.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspr2.cpp b/test/correctness/routines/level2/xspr2.cpp index caa46a09..2650bd03 100644 --- a/test/correctness/routines/level2/xspr2.cpp +++ b/test/correctness/routines/level2/xspr2.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsymv.cpp b/test/correctness/routines/level2/xsymv.cpp index 978a5f8a..3f0a8f8b 100644 --- a/test/correctness/routines/level2/xsymv.cpp +++ b/test/correctness/routines/level2/xsymv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsymv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsyr.cpp b/test/correctness/routines/level2/xsyr.cpp index 244dbfb4..15ac1f14 100644 --- a/test/correctness/routines/level2/xsyr.cpp +++ b/test/correctness/routines/level2/xsyr.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsyr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsyr2.cpp b/test/correctness/routines/level2/xsyr2.cpp index 422e67ad..74806219 100644 --- a/test/correctness/routines/level2/xsyr2.cpp +++ b/test/correctness/routines/level2/xsyr2.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsyr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xtbmv.cpp b/test/correctness/routines/level2/xtbmv.cpp index 491708ec..667ae732 100644 --- a/test/correctness/routines/level2/xtbmv.cpp +++ b/test/correctness/routines/level2/xtbmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtbmv<float>, float, float>(argc, argv, false, "STBMV"); errors += clblast::RunTests<clblast::TestXtbmv<double>, double, double>(argc, argv, true, "DTBMV"); - errors += clblast::RunTests<clblast::TestXtbmv<float2>, float2, float2>(argc, argv, true, "CTBMV"); - errors += clblast::RunTests<clblast::TestXtbmv<double2>, double2, double2>(argc, argv, true, "ZTBMV"); + errors += clblast::RunTests<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBMV"); + errors += clblast::RunTests<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBMV"); errors += clblast::RunTests<clblast::TestXtbmv<half>, half, half>(argc, argv, true, "HTBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtbsv.cpp b/test/correctness/routines/level2/xtbsv.cpp index 12b5dca5..5cfc6942 100644 --- a/test/correctness/routines/level2/xtbsv.cpp +++ b/test/correctness/routines/level2/xtbsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtbsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtbsv<float>, float, float>(argc, argv, false, "STBSV"); errors += clblast::RunTests<clblast::TestXtbsv<double>, double, double>(argc, argv, true, "DTBSV"); - errors += clblast::RunTests<clblast::TestXtbsv<float2>, float2, float2>(argc, argv, true, "CTBSV"); - errors += clblast::RunTests<clblast::TestXtbsv<double2>, double2, double2>(argc, argv, true, "ZTBSV"); + errors += clblast::RunTests<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBSV"); + errors += clblast::RunTests<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtpmv.cpp b/test/correctness/routines/level2/xtpmv.cpp index b89f0adc..89056678 100644 --- a/test/correctness/routines/level2/xtpmv.cpp +++ b/test/correctness/routines/level2/xtpmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtpmv<float>, float, float>(argc, argv, false, "STPMV"); errors += clblast::RunTests<clblast::TestXtpmv<double>, double, double>(argc, argv, true, "DTPMV"); - errors += clblast::RunTests<clblast::TestXtpmv<float2>, float2, float2>(argc, argv, true, "CTPMV"); - errors += clblast::RunTests<clblast::TestXtpmv<double2>, double2, double2>(argc, argv, true, "ZTPMV"); + errors += clblast::RunTests<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPMV"); + errors += clblast::RunTests<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPMV"); errors += clblast::RunTests<clblast::TestXtpmv<half>, half, half>(argc, argv, true, "HTPMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtpsv.cpp b/test/correctness/routines/level2/xtpsv.cpp index 6e6e7c85..28c9fe39 100644 --- a/test/correctness/routines/level2/xtpsv.cpp +++ b/test/correctness/routines/level2/xtpsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtpsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtpsv<float>, float, float>(argc, argv, false, "STPSV"); errors += clblast::RunTests<clblast::TestXtpsv<double>, double, double>(argc, argv, true, "DTPSV"); - errors += clblast::RunTests<clblast::TestXtpsv<float2>, float2, float2>(argc, argv, true, "CTPSV"); - errors += clblast::RunTests<clblast::TestXtpsv<double2>, double2, double2>(argc, argv, true, "ZTPSV"); + errors += clblast::RunTests<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPSV"); + errors += clblast::RunTests<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtrmv.cpp b/test/correctness/routines/level2/xtrmv.cpp index 819f5cad..b1a414af 100644 --- a/test/correctness/routines/level2/xtrmv.cpp +++ b/test/correctness/routines/level2/xtrmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtrmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtrmv<float>, float, float>(argc, argv, false, "STRMV"); errors += clblast::RunTests<clblast::TestXtrmv<double>, double, double>(argc, argv, true, "DTRMV"); - errors += clblast::RunTests<clblast::TestXtrmv<float2>, float2, float2>(argc, argv, true, "CTRMV"); - errors += clblast::RunTests<clblast::TestXtrmv<double2>, double2, double2>(argc, argv, true, "ZTRMV"); + errors += clblast::RunTests<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMV"); + errors += clblast::RunTests<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMV"); errors += clblast::RunTests<clblast::TestXtrmv<half>, half, half>(argc, argv, true, "HTRMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtrsv.cpp b/test/correctness/routines/level2/xtrsv.cpp index 78e33807..b35d7fc7 100644 --- a/test/correctness/routines/level2/xtrsv.cpp +++ b/test/correctness/routines/level2/xtrsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtrsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtrsv<float>, float, float>(argc, argv, false, "STRSV"); errors += clblast::RunTests<clblast::TestXtrsv<double>, double, double>(argc, argv, true, "DTRSV"); - errors += clblast::RunTests<clblast::TestXtrsv<float2>, float2, float2>(argc, argv, true, "CTRSV"); - errors += clblast::RunTests<clblast::TestXtrsv<double2>, double2, double2>(argc, argv, true, "ZTRSV"); + errors += clblast::RunTests<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSV"); + errors += clblast::RunTests<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp index 54d41719..7fda5f2d 100644 --- a/test/correctness/routines/level3/xgemm.cpp +++ b/test/correctness/routines/level3/xgemm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xgemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXgemm<float>, float, float>(argc, argv, false, "SGEMM"); errors += clblast::RunTests<clblast::TestXgemm<double>, double, double>(argc, argv, true, "DGEMM"); - errors += clblast::RunTests<clblast::TestXgemm<float2>, float2, float2>(argc, argv, true, "CGEMM"); - errors += clblast::RunTests<clblast::TestXgemm<double2>, double2, double2>(argc, argv, true, "ZGEMM"); + errors += clblast::RunTests<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); + errors += clblast::RunTests<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); errors += clblast::RunTests<clblast::TestXgemm<half>, half, half>(argc, argv, true, "HGEMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xhemm.cpp b/test/correctness/routines/level3/xhemm.cpp index 76c970a7..cbd277e2 100644 --- a/test/correctness/routines/level3/xhemm.cpp +++ b/test/correctness/routines/level3/xhemm.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xhemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXhemm<float2>, float2, float2>(argc, argv, false, "CHEMM"); - errors += clblast::RunTests<clblast::TestXhemm<double2>, double2, double2>(argc, argv, true, "ZHEMM"); + errors += clblast::RunTests<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMM"); + errors += clblast::RunTests<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xher2k.cpp b/test/correctness/routines/level3/xher2k.cpp index c653265e..e21a429c 100644 --- a/test/correctness/routines/level3/xher2k.cpp +++ b/test/correctness/routines/level3/xher2k.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xher2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXher2k<float2,float>, float2, float>(argc, argv, false, "CHER2K"); - errors += clblast::RunTests<clblast::TestXher2k<double2,double>, double2, double>(argc, argv, true, "ZHER2K"); + errors += clblast::RunTests<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER2K"); + errors += clblast::RunTests<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER2K"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xherk.cpp b/test/correctness/routines/level3/xherk.cpp index 09ea9e4d..5665147e 100644 --- a/test/correctness/routines/level3/xherk.cpp +++ b/test/correctness/routines/level3/xherk.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xherk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests<clblast::TestXherk<float2,float>, float2, float>(argc, argv, false, "CHERK"); - errors += clblast::RunTests<clblast::TestXherk<double2,double>, double2, double>(argc, argv, true, "ZHERK"); + errors += clblast::RunTests<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHERK"); + errors += clblast::RunTests<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHERK"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsymm.cpp b/test/correctness/routines/level3/xsymm.cpp index 3cb3515a..3e745d24 100644 --- a/test/correctness/routines/level3/xsymm.cpp +++ b/test/correctness/routines/level3/xsymm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsymm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXsymm<float>, float, float>(argc, argv, false, "SSYMM"); errors += clblast::RunTests<clblast::TestXsymm<double>, double, double>(argc, argv, true, "DSYMM"); - errors += clblast::RunTests<clblast::TestXsymm<float2>, float2, float2>(argc, argv, true, "CSYMM"); - errors += clblast::RunTests<clblast::TestXsymm<double2>, double2, double2>(argc, argv, true, "ZSYMM"); + errors += clblast::RunTests<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYMM"); + errors += clblast::RunTests<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYMM"); errors += clblast::RunTests<clblast::TestXsymm<half>, half, half>(argc, argv, true, "HSYMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsyr2k.cpp b/test/correctness/routines/level3/xsyr2k.cpp index 617af04d..b3027063 100644 --- a/test/correctness/routines/level3/xsyr2k.cpp +++ b/test/correctness/routines/level3/xsyr2k.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsyr2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXsyr2k<float>, float, float>(argc, argv, false, "SSYR2K"); errors += clblast::RunTests<clblast::TestXsyr2k<double>, double, double>(argc, argv, true, "DSYR2K"); - errors += clblast::RunTests<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv, true, "CSYR2K"); - errors += clblast::RunTests<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv, true, "ZSYR2K"); + errors += clblast::RunTests<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYR2K"); + errors += clblast::RunTests<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYR2K"); errors += clblast::RunTests<clblast::TestXsyr2k<half>, half, half>(argc, argv, true, "HSYR2K"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsyrk.cpp b/test/correctness/routines/level3/xsyrk.cpp index 2014b8d0..26c0db41 100644 --- a/test/correctness/routines/level3/xsyrk.cpp +++ b/test/correctness/routines/level3/xsyrk.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsyrk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXsyrk<float>, float, float>(argc, argv, false, "SSYRK"); errors += clblast::RunTests<clblast::TestXsyrk<double>, double, double>(argc, argv, true, "DSYRK"); - errors += clblast::RunTests<clblast::TestXsyrk<float2>, float2, float2>(argc, argv, true, "CSYRK"); - errors += clblast::RunTests<clblast::TestXsyrk<double2>, double2, double2>(argc, argv, true, "ZSYRK"); + errors += clblast::RunTests<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYRK"); + errors += clblast::RunTests<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYRK"); errors += clblast::RunTests<clblast::TestXsyrk<half>, half, half>(argc, argv, true, "HSYRK"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xtrmm.cpp b/test/correctness/routines/level3/xtrmm.cpp index 32640d52..63d17ed5 100644 --- a/test/correctness/routines/level3/xtrmm.cpp +++ b/test/correctness/routines/level3/xtrmm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xtrmm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtrmm<float>, float, float>(argc, argv, false, "STRMM"); errors += clblast::RunTests<clblast::TestXtrmm<double>, double, double>(argc, argv, true, "DTRMM"); - errors += clblast::RunTests<clblast::TestXtrmm<float2>, float2, float2>(argc, argv, true, "CTRMM"); - errors += clblast::RunTests<clblast::TestXtrmm<double2>, double2, double2>(argc, argv, true, "ZTRMM"); + errors += clblast::RunTests<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMM"); + errors += clblast::RunTests<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMM"); errors += clblast::RunTests<clblast::TestXtrmm<half>, half, half>(argc, argv, true, "HTRMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xtrsm.cpp b/test/correctness/routines/level3/xtrsm.cpp index bc45a8bf..dcc20060 100644 --- a/test/correctness/routines/level3/xtrsm.cpp +++ b/test/correctness/routines/level3/xtrsm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xtrsm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXtrsm<float>, float, float>(argc, argv, false, "STRSM"); errors += clblast::RunTests<clblast::TestXtrsm<double>, double, double>(argc, argv, true, "DTRSM"); - errors += clblast::RunTests<clblast::TestXtrsm<float2>, float2, float2>(argc, argv, true, "CTRSM"); - errors += clblast::RunTests<clblast::TestXtrsm<double2>, double2, double2>(argc, argv, true, "ZTRSM"); + errors += clblast::RunTests<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSM"); + errors += clblast::RunTests<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xaxpybatched.cpp b/test/correctness/routines/levelx/xaxpybatched.cpp index a106440f..3b906217 100644 --- a/test/correctness/routines/levelx/xaxpybatched.cpp +++ b/test/correctness/routines/levelx/xaxpybatched.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xaxpybatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXaxpyBatched<float>, float, float>(argc, argv, false, "SAXPYBATCHED"); errors += clblast::RunTests<clblast::TestXaxpyBatched<double>, double, double>(argc, argv, true, "DAXPYBATCHED"); - errors += clblast::RunTests<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv, true, "CAXPYBATCHED"); - errors += clblast::RunTests<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv, true, "ZAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPYBATCHED"); errors += clblast::RunTests<clblast::TestXaxpyBatched<half>, half, half>(argc, argv, true, "HAXPYBATCHED"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xgemmbatched.cpp b/test/correctness/routines/levelx/xgemmbatched.cpp index 748e1bb7..1e931fd5 100644 --- a/test/correctness/routines/levelx/xgemmbatched.cpp +++ b/test/correctness/routines/levelx/xgemmbatched.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xgemmbatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXgemmBatched<float>, float, float>(argc, argv, false, "SGEMMBATCHED"); errors += clblast::RunTests<clblast::TestXgemmBatched<double>, double, double>(argc, argv, true, "DGEMMBATCHED"); - errors += clblast::RunTests<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv, true, "CGEMMBATCHED"); - errors += clblast::RunTests<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv, true, "ZGEMMBATCHED"); + errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMBATCHED"); + errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMBATCHED"); errors += clblast::RunTests<clblast::TestXgemmBatched<half>, half, half>(argc, argv, true, "HGEMMBATCHED"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xomatcopy.cpp b/test/correctness/routines/levelx/xomatcopy.cpp index e034bc18..f512432b 100644 --- a/test/correctness/routines/levelx/xomatcopy.cpp +++ b/test/correctness/routines/levelx/xomatcopy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xomatcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests<clblast::TestXomatcopy<float>, float, float>(argc, argv, false, "SOMATCOPY"); errors += clblast::RunTests<clblast::TestXomatcopy<double>, double, double>(argc, argv, true, "DOMATCOPY"); - errors += clblast::RunTests<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv, true, "COMATCOPY"); - errors += clblast::RunTests<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv, true, "ZOMATCOPY"); + errors += clblast::RunTests<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "COMATCOPY"); + errors += clblast::RunTests<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZOMATCOPY"); errors += clblast::RunTests<clblast::TestXomatcopy<half>, half, half>(argc, argv, true, "HOMATCOPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index 40784fdb..b352c1aa 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -116,24 +116,38 @@ Tester<T,U>::Tester(const std::vector<std::string> &arguments, const bool silent tests_failed_{0} { options_ = options; + // Determines which reference is the default + auto default_clblas = 0; + auto default_cblas = 0; + auto default_cublas = 0; + #if defined(CLBLAST_REF_CBLAS) + default_cblas = 1; + #elif defined(CLBLAST_REF_CLBLAS) + default_clblas = 1; + #elif defined(CLBLAST_REF_CUBLAS) + default_cublas = 1; + #endif + // Determines which reference to test against - #if defined(CLBLAST_REF_CLBLAS) && defined(CLBLAST_REF_CBLAS) - compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 0); - compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1); - #elif CLBLAST_REF_CLBLAS - compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 1); - compare_cblas_ = 0; - #elif CLBLAST_REF_CBLAS - compare_clblas_ = 0; - compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1); - #else - compare_clblas_ = 0; - compare_cblas_ = 0; + compare_clblas_ = 0; + compare_cblas_ = 0; + compare_cublas_ = 0; + #if defined(CLBLAST_REF_CBLAS) + compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, default_cblas); + #endif + #if defined(CLBLAST_REF_CLBLAS) + compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, default_clblas); + #endif + #if defined(CLBLAST_REF_CUBLAS) + compare_cublas_ = GetArgument(arguments, help_, kArgComparecublas, default_cublas); #endif // Prints the help message (command-line arguments) if (!silent) { fprintf(stdout, "\n* %s\n", help_.c_str()); } + // Support for cuBLAS not available yet + if (compare_cublas_) { throw std::runtime_error("Cannot test against cuBLAS; not implemented yet"); } + // Can only test against a single reference (not two, not zero) if (compare_clblas_ && compare_cblas_) { throw std::runtime_error("Cannot test against both clBLAS and CBLAS references; choose one using the -cblas and -clblas arguments"); diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp index f60be04b..8cfa702f 100644 --- a/test/correctness/tester.hpp +++ b/test/correctness/tester.hpp @@ -113,6 +113,7 @@ class Tester { // Testing against reference implementations int compare_cblas_; int compare_clblas_; + int compare_cublas_; private: diff --git a/test/performance/client.cpp b/test/performance/client.cpp index 48d6708e..a2f0f9f4 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -30,13 +30,14 @@ template <typename T, typename U> const int Client<T,U>::kSeed = 42; // fixed se template <typename T, typename U> Client<T,U>::Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2, - const std::vector<std::string> &options, + const Reference3 run_reference3, const std::vector<std::string> &options, const std::vector<std::string> &buffers_in, const std::vector<std::string> &buffers_out, const GetMetric get_flops, const GetMetric get_bytes): run_routine_(run_routine), run_reference1_(run_reference1), run_reference2_(run_reference2), + run_reference3_(run_reference3), options_(options), buffers_in_(buffers_in), buffers_out_(buffers_out), @@ -119,6 +120,11 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le #else args.compare_cblas = 0; #endif + #ifdef CLBLAST_REF_CUBLAS + args.compare_cublas = GetArgument(command_line_args, help, kArgComparecublas, 1); + #else + args.compare_cublas = 0; + #endif args.step = GetArgument(command_line_args, help, kArgStepSize, size_t{1}); args.num_steps = GetArgument(command_line_args, help, kArgNumSteps, size_t{0}); args.num_runs = GetArgument(command_line_args, help, kArgNumRuns, size_t{10}); @@ -133,24 +139,26 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le // Comparison against a non-BLAS routine is not supported if (level == 4) { // level-4 == level-X - if (args.compare_clblas != 0 || args.compare_cblas != 0) { + if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) { if (!args.silent) { - fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for this non-BLAS routine\n\n"); + fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for this non-BLAS routine\n\n"); } } args.compare_clblas = 0; args.compare_cblas = 0; + args.compare_cublas = 0; } - // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision + // Comparison against other BLAS libraries is not supported in case of half-precision if (args.precision == Precision::kHalf) { - if (args.compare_clblas != 0 || args.compare_cblas != 0) { + if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) { if (!args.silent) { - fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n"); + fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for half-precision\n\n"); } } args.compare_clblas = 0; args.compare_cblas = 0; + args.compare_cublas = 0; } // Returns the arguments @@ -174,6 +182,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) #ifdef CLBLAST_REF_CLBLAS if (args.compare_clblas) { clblasSetup(); } #endif + #ifdef CLBLAST_REF_CUBLAS + cudaSetDevice(static_cast<int>(args.device_id)); + #endif // Iterates over all "num_step" values jumping by "step" each time auto s = size_t{0}; @@ -232,6 +243,16 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) HostToDevice(args, buffers, buffers_host, queue, buffers_out_); timings.push_back(std::pair<std::string, double>("CPU BLAS", ms_cblas)); } + if (args.compare_cublas) { + auto buffers_host = BuffersHost<T>(); + auto buffers_cuda = BuffersCUDA<T>(); + DeviceToHost(args, buffers, buffers_host, queue, buffers_in_); + HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_); + auto ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS"); + CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_); + HostToDevice(args, buffers, buffers_host, queue, buffers_out_); + timings.push_back(std::pair<std::string, double>("cuBLAS", ms_cublas)); + } // Prints the performance of the tested libraries PrintTableRow(args, timings); @@ -307,6 +328,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) { fprintf(stdout, " | <-- CLBlast -->"); if (args.compare_clblas) { fprintf(stdout, " | <-- clBLAS -->"); } if (args.compare_cblas) { fprintf(stdout, " | <-- CPU BLAS -->"); } + if (args.compare_cublas) { fprintf(stdout, " | <-- cuBLAS -->"); } fprintf(stdout, " |\n"); } @@ -315,6 +337,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) { fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1"); if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); } if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); } + if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); } fprintf(stdout, "\n"); } diff --git a/test/performance/client.hpp b/test/performance/client.hpp index 12fd113d..47a13017 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -31,6 +31,7 @@ #ifdef CLBLAST_REF_CLBLAS #include <clBLAS.h> #endif +#include "test/wrapper_cuda.hpp" #include "clblast.h" namespace clblast { @@ -46,12 +47,13 @@ class Client { using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>; using Reference1 = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>; using Reference2 = std::function<StatusCode(const Arguments<U>&, BuffersHost<T>&, Queue&)>; + using Reference3 = std::function<StatusCode(const Arguments<U>&, BuffersCUDA<T>&, Queue&)>; using SetMetric = std::function<void(Arguments<U>&)>; using GetMetric = std::function<size_t(const Arguments<U>&)>; // The constructor Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2, - const std::vector<std::string> &options, + const Reference3 run_reference3, const std::vector<std::string> &options, const std::vector<std::string> &buffers_in, const std::vector<std::string> &buffers_out, const GetMetric get_flops, const GetMetric get_bytes); @@ -84,6 +86,7 @@ class Client { const Routine run_routine_; const Reference1 run_reference1_; const Reference2 run_reference2_; + const Reference3 run_reference3_; const std::vector<std::string> options_; const std::vector<std::string> buffers_in_; const std::vector<std::string> buffers_out_; @@ -118,9 +121,14 @@ void RunClient(int argc, char *argv[]) { #else auto reference2 = ReferenceNotAvailable<T,U,BuffersHost<T>>; #endif + #ifdef CLBLAST_REF_CUBLAS + auto reference3 = C::RunReference3; // cuBLAS when available + #else + auto reference3 = ReferenceNotAvailable<T,U,BuffersCUDA<T>>; + #endif // Creates a new client - auto client = Client<T,U>(C::RunRoutine, reference1, reference2, C::GetOptions(), + auto client = Client<T,U>(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(), C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes); // Simple command line argument parser with defaults diff --git a/test/performance/routines/level1/xamax.cpp b/test/performance/routines/level1/xamax.cpp index 5dc7b3d9..5cbef604 100644 --- a/test/performance/routines/level1/xamax.cpp +++ b/test/performance/routines/level1/xamax.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xamax.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXamax<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXamax<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXamax<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xasum.cpp b/test/performance/routines/level1/xasum.cpp index bf5b2fa9..7fccb678 100644 --- a/test/performance/routines/level1/xasum.cpp +++ b/test/performance/routines/level1/xasum.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xasum.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXasum<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXasum<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXasum<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xaxpy.cpp b/test/performance/routines/level1/xaxpy.cpp index faccc089..739408bb 100644 --- a/test/performance/routines/level1/xaxpy.cpp +++ b/test/performance/routines/level1/xaxpy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xaxpy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXaxpy<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXaxpy<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXaxpy<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xcopy.cpp b/test/performance/routines/level1/xcopy.cpp index 8aa536af..902c394f 100644 --- a/test/performance/routines/level1/xcopy.cpp +++ b/test/performance/routines/level1/xcopy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXcopy<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXcopy<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXcopy<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xdot.cpp b/test/performance/routines/level1/xdot.cpp index 9a570e1e..b2d4d969 100644 --- a/test/performance/routines/level1/xdot.cpp +++ b/test/performance/routines/level1/xdot.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xdotc.cpp b/test/performance/routines/level1/xdotc.cpp index 426b81ae..308bcdab 100644 --- a/test/performance/routines/level1/xdotc.cpp +++ b/test/performance/routines/level1/xdotc.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdotc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXdotc<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXdotc<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xdotu.cpp b/test/performance/routines/level1/xdotu.cpp index 4fbe167d..fc54a8a5 100644 --- a/test/performance/routines/level1/xdotu.cpp +++ b/test/performance/routines/level1/xdotu.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdotu.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXdotu<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXdotu<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xnrm2.cpp b/test/performance/routines/level1/xnrm2.cpp index 6a1cdcc7..769335eb 100644 --- a/test/performance/routines/level1/xnrm2.cpp +++ b/test/performance/routines/level1/xnrm2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xnrm2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXnrm2<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXnrm2<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXnrm2<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xrot.cpp b/test/performance/routines/level1/xrot.cpp index 2b94ca39..f010e04a 100644 --- a/test/performance/routines/level1/xrot.cpp +++ b/test/performance/routines/level1/xrot.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotg.cpp b/test/performance/routines/level1/xrotg.cpp index ee6fc44b..4c8d33cf 100644 --- a/test/performance/routines/level1/xrotg.cpp +++ b/test/performance/routines/level1/xrotg.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotm.cpp b/test/performance/routines/level1/xrotm.cpp index e8d73311..bc2111b3 100644 --- a/test/performance/routines/level1/xrotm.cpp +++ b/test/performance/routines/level1/xrotm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotmg.cpp b/test/performance/routines/level1/xrotmg.cpp index a5266b14..fb568243 100644 --- a/test/performance/routines/level1/xrotmg.cpp +++ b/test/performance/routines/level1/xrotmg.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotmg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xscal.cpp b/test/performance/routines/level1/xscal.cpp index 6fefc5d0..b9db60cf 100644 --- a/test/performance/routines/level1/xscal.cpp +++ b/test/performance/routines/level1/xscal.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xscal.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXscal<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXscal<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXscal<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xswap.cpp b/test/performance/routines/level1/xswap.cpp index b728b8f4..db40f6e4 100644 --- a/test/performance/routines/level1/xswap.cpp +++ b/test/performance/routines/level1/xswap.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xswap.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXswap<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXswap<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXswap<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgbmv.cpp b/test/performance/routines/level2/xgbmv.cpp index 6a4b01f8..23a91503 100644 --- a/test/performance/routines/level2/xgbmv.cpp +++ b/test/performance/routines/level2/xgbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXgbmv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgbmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgbmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgemv.cpp b/test/performance/routines/level2/xgemv.cpp index 335d5ef1..3bb14b68 100644 --- a/test/performance/routines/level2/xgemv.cpp +++ b/test/performance/routines/level2/xgemv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXgemv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgemv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgemv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xger.cpp b/test/performance/routines/level2/xger.cpp index 50fdb9e6..ca23b8f0 100644 --- a/test/performance/routines/level2/xger.cpp +++ b/test/performance/routines/level2/xger.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xger.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xgerc.cpp b/test/performance/routines/level2/xgerc.cpp index 67c72285..0423cdd5 100644 --- a/test/performance/routines/level2/xgerc.cpp +++ b/test/performance/routines/level2/xgerc.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgerc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgerc<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgerc<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgeru.cpp b/test/performance/routines/level2/xgeru.cpp index 6e845bb8..c0fbb2d5 100644 --- a/test/performance/routines/level2/xgeru.cpp +++ b/test/performance/routines/level2/xgeru.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgeru.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgeru<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgeru<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhbmv.cpp b/test/performance/routines/level2/xhbmv.cpp index 600317c1..d59cba26 100644 --- a/test/performance/routines/level2/xhbmv.cpp +++ b/test/performance/routines/level2/xhbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhbmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhbmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhemv.cpp b/test/performance/routines/level2/xhemv.cpp index 7700cf7b..1664b6cd 100644 --- a/test/performance/routines/level2/xhemv.cpp +++ b/test/performance/routines/level2/xhemv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhemv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhemv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xher.cpp b/test/performance/routines/level2/xher.cpp index e7276aee..434f486c 100644 --- a/test/performance/routines/level2/xher.cpp +++ b/test/performance/routines/level2/xher.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xher.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXher<float2,float>, float2, float>(argc, argv); break; + clblast::RunClient<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXher<double2,double>, double2, double>(argc, argv); break; + clblast::RunClient<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xher2.cpp b/test/performance/routines/level2/xher2.cpp index b4c53206..cce40a9e 100644 --- a/test/performance/routines/level2/xher2.cpp +++ b/test/performance/routines/level2/xher2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xher2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXher2<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXher2<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpmv.cpp b/test/performance/routines/level2/xhpmv.cpp index d9683d2e..d88791fe 100644 --- a/test/performance/routines/level2/xhpmv.cpp +++ b/test/performance/routines/level2/xhpmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhpmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhpmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpr.cpp b/test/performance/routines/level2/xhpr.cpp index c4ffaf81..a92a3134 100644 --- a/test/performance/routines/level2/xhpr.cpp +++ b/test/performance/routines/level2/xhpr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhpr<float2,float>, float2, float>(argc, argv); break; + clblast::RunClient<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhpr<double2,double>, double2, double>(argc, argv); break; + clblast::RunClient<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpr2.cpp b/test/performance/routines/level2/xhpr2.cpp index 3e5d4004..f34de29b 100644 --- a/test/performance/routines/level2/xhpr2.cpp +++ b/test/performance/routines/level2/xhpr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhpr2<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhpr2<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xsbmv.cpp b/test/performance/routines/level2/xsbmv.cpp index 9c0ab3b6..59bbf40c 100644 --- a/test/performance/routines/level2/xsbmv.cpp +++ b/test/performance/routines/level2/xsbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspmv.cpp b/test/performance/routines/level2/xspmv.cpp index 6cc4e3ba..9ba29f43 100644 --- a/test/performance/routines/level2/xspmv.cpp +++ b/test/performance/routines/level2/xspmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspr.cpp b/test/performance/routines/level2/xspr.cpp index dc45ba6d..57551f5d 100644 --- a/test/performance/routines/level2/xspr.cpp +++ b/test/performance/routines/level2/xspr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspr2.cpp b/test/performance/routines/level2/xspr2.cpp index 3c9a769f..573fb652 100644 --- a/test/performance/routines/level2/xspr2.cpp +++ b/test/performance/routines/level2/xspr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsymv.cpp b/test/performance/routines/level2/xsymv.cpp index aaa98c8b..25933d8d 100644 --- a/test/performance/routines/level2/xsymv.cpp +++ b/test/performance/routines/level2/xsymv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsymv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsyr.cpp b/test/performance/routines/level2/xsyr.cpp index d710bf63..3b54510d 100644 --- a/test/performance/routines/level2/xsyr.cpp +++ b/test/performance/routines/level2/xsyr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsyr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsyr2.cpp b/test/performance/routines/level2/xsyr2.cpp index 39b46b6a..ab9641c2 100644 --- a/test/performance/routines/level2/xsyr2.cpp +++ b/test/performance/routines/level2/xsyr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsyr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xtbmv.cpp b/test/performance/routines/level2/xtbmv.cpp index 5fb3ea14..319f9c80 100644 --- a/test/performance/routines/level2/xtbmv.cpp +++ b/test/performance/routines/level2/xtbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtbmv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtbmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtbmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtbsv.cpp b/test/performance/routines/level2/xtbsv.cpp index 7b88917c..4d37e76d 100644 --- a/test/performance/routines/level2/xtbsv.cpp +++ b/test/performance/routines/level2/xtbsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtbsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtbsv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtbsv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtbsv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtpmv.cpp b/test/performance/routines/level2/xtpmv.cpp index 907749a7..c2db51b1 100644 --- a/test/performance/routines/level2/xtpmv.cpp +++ b/test/performance/routines/level2/xtpmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtpmv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtpmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtpmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtpsv.cpp b/test/performance/routines/level2/xtpsv.cpp index 0dab8ff6..b01a9f05 100644 --- a/test/performance/routines/level2/xtpsv.cpp +++ b/test/performance/routines/level2/xtpsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtpsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtpsv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtpsv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtpsv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtrmv.cpp b/test/performance/routines/level2/xtrmv.cpp index c2c6f232..610a5052 100644 --- a/test/performance/routines/level2/xtrmv.cpp +++ b/test/performance/routines/level2/xtrmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtrmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtrmv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtrmv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtrmv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtrsv.cpp b/test/performance/routines/level2/xtrsv.cpp index 49e477f7..02255e71 100644 --- a/test/performance/routines/level2/xtrsv.cpp +++ b/test/performance/routines/level2/xtrsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtrsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtrsv<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtrsv<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtrsv<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xgemm.cpp b/test/performance/routines/level3/xgemm.cpp index deb2493f..602e1a20 100644 --- a/test/performance/routines/level3/xgemm.cpp +++ b/test/performance/routines/level3/xgemm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xgemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXgemm<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgemm<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgemm<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xhemm.cpp b/test/performance/routines/level3/xhemm.cpp index 975c672f..6c3687a9 100644 --- a/test/performance/routines/level3/xhemm.cpp +++ b/test/performance/routines/level3/xhemm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xhemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXhemm<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXhemm<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xher2k.cpp b/test/performance/routines/level3/xher2k.cpp index d579d4f9..9d3385f7 100644 --- a/test/performance/routines/level3/xher2k.cpp +++ b/test/performance/routines/level3/xher2k.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xher2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXher2k<float2,float>, float2, float>(argc, argv); break; + clblast::RunClient<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXher2k<double2,double>, double2, double>(argc, argv); break; + clblast::RunClient<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xherk.cpp b/test/performance/routines/level3/xherk.cpp index 94411e5a..ae6e774e 100644 --- a/test/performance/routines/level3/xherk.cpp +++ b/test/performance/routines/level3/xherk.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xherk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXherk<float2,float>, float2, float>(argc, argv); break; + clblast::RunClient<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXherk<double2,double>, double2, double>(argc, argv); break; + clblast::RunClient<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsymm.cpp b/test/performance/routines/level3/xsymm.cpp index 04ae8eb0..ba3b6ab2 100644 --- a/test/performance/routines/level3/xsymm.cpp +++ b/test/performance/routines/level3/xsymm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsymm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXsymm<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXsymm<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXsymm<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsyr2k.cpp b/test/performance/routines/level3/xsyr2k.cpp index 7b8b6f4f..150a4192 100644 --- a/test/performance/routines/level3/xsyr2k.cpp +++ b/test/performance/routines/level3/xsyr2k.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsyr2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXsyr2k<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsyrk.cpp b/test/performance/routines/level3/xsyrk.cpp index ea0fc33b..00cef52b 100644 --- a/test/performance/routines/level3/xsyrk.cpp +++ b/test/performance/routines/level3/xsyrk.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsyrk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXsyrk<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXsyrk<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXsyrk<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xtrmm.cpp b/test/performance/routines/level3/xtrmm.cpp index 7a29e111..fb54a410 100644 --- a/test/performance/routines/level3/xtrmm.cpp +++ b/test/performance/routines/level3/xtrmm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xtrmm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtrmm<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtrmm<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtrmm<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xtrsm.cpp b/test/performance/routines/level3/xtrsm.cpp index 342274b7..f44265f2 100644 --- a/test/performance/routines/level3/xtrsm.cpp +++ b/test/performance/routines/level3/xtrsm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xtrsm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXtrsm<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXtrsm<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXtrsm<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xaxpybatched.cpp b/test/performance/routines/levelx/xaxpybatched.cpp index 6d3bcb51..7c09cd5b 100644 --- a/test/performance/routines/levelx/xaxpybatched.cpp +++ b/test/performance/routines/levelx/xaxpybatched.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xaxpybatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXaxpyBatched<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xgemmbatched.cpp b/test/performance/routines/levelx/xgemmbatched.cpp index c9477fad..f4c860d8 100644 --- a/test/performance/routines/levelx/xgemmbatched.cpp +++ b/test/performance/routines/levelx/xgemmbatched.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xgemmbatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXgemmBatched<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xomatcopy.cpp b/test/performance/routines/levelx/xomatcopy.cpp index 5821c3b8..568f22e6 100644 --- a/test/performance/routines/levelx/xomatcopy.cpp +++ b/test/performance/routines/levelx/xomatcopy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xomatcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient<clblast::TestXomatcopy<double>, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv); break; + clblast::RunClient<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv); break; + clblast::RunClient<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/routines/common.hpp b/test/routines/common.hpp new file mode 100644 index 00000000..1abf5528 --- /dev/null +++ b/test/routines/common.hpp @@ -0,0 +1,35 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains all the common includes for the clients and tests +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_COMMON_H_ +#define CLBLAST_TEST_ROUTINES_COMMON_H_ + +#include <vector> +#include <string> + +#include "utilities/utilities.hpp" + +#ifdef CLBLAST_REF_CLBLAS + #include "test/wrapper_clblas.hpp" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "test/wrapper_cblas.hpp" +#endif +#ifdef CLBLAST_REF_CUBLAS + #include "test/wrapper_cuda.hpp" +#endif + +// ================================================================================================= + +// CLBLAST_TEST_ROUTINES_COMMON_H_ +#endif diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp index 2e844f2c..fccefc73 100644 --- a/test/routines/level1/xamax.hpp +++ b/test/routines/level1/xamax.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAMAX_H_ #define CLBLAST_TEST_ROUTINES_XAMAX_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp index 8488bfc6..f0fca4d3 100644 --- a/test/routines/level1/xasum.hpp +++ b/test/routines/level1/xasum.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XASUM_H_ #define CLBLAST_TEST_ROUTINES_XASUM_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index cc7d251a..8426d739 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAXPY_H_ #define CLBLAST_TEST_ROUTINES_XAXPY_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp index 0dbf0f3d..d1e7f49e 100644 --- a/test/routines/level1/xcopy.hpp +++ b/test/routines/level1/xcopy.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XCOPY_H_ #define CLBLAST_TEST_ROUTINES_XCOPY_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp index bdf2e721..cb3d7979 100644 --- a/test/routines/level1/xdot.hpp +++ b/test/routines/level1/xdot.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOT_H_ #define CLBLAST_TEST_ROUTINES_XDOT_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp index 2cc71b93..10ecbda6 100644 --- a/test/routines/level1/xdotc.hpp +++ b/test/routines/level1/xdotc.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOTC_H_ #define CLBLAST_TEST_ROUTINES_XDOTC_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp index 272e1e31..6efd270e 100644 --- a/test/routines/level1/xdotu.hpp +++ b/test/routines/level1/xdotu.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOTU_H_ #define CLBLAST_TEST_ROUTINES_XDOTU_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp index cb1ec683..0ba24b13 100644 --- a/test/routines/level1/xnrm2.hpp +++ b/test/routines/level1/xnrm2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XNRM2_H_ #define CLBLAST_TEST_ROUTINES_XNRM2_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp index 3e6b9a38..e7db434e 100644 --- a/test/routines/level1/xscal.hpp +++ b/test/routines/level1/xscal.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSCAL_H_ #define CLBLAST_TEST_ROUTINES_XSCAL_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp index d9b84dc4..64feb744 100644 --- a/test/routines/level1/xswap.hpp +++ b/test/routines/level1/xswap.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSWAP_H_ #define CLBLAST_TEST_ROUTINES_XSWAP_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index 990ef49f..fb36d7f2 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGBMV_H_ #define CLBLAST_TEST_ROUTINES_XGBMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index a007cb62..4654838e 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMV_H_ #define CLBLAST_TEST_ROUTINES_XGEMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 5c131e2d..9d1dec13 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGER_H_ #define CLBLAST_TEST_ROUTINES_XGER_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index e3544424..efa72744 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGERC_H_ #define CLBLAST_TEST_ROUTINES_XGERC_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index 1d81e292..cb14636e 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGERU_H_ #define CLBLAST_TEST_ROUTINES_XGERU_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index 21194fd6..f41cc572 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHBMV_H_ #define CLBLAST_TEST_ROUTINES_XHBMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index ffef8ff8..9f5aca00 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHEMV_H_ #define CLBLAST_TEST_ROUTINES_XHEMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index 083bd3fc..ef0afd1c 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER_H_ #define CLBLAST_TEST_ROUTINES_XHER_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index 7bd890a5..d4b06c49 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER2_H_ #define CLBLAST_TEST_ROUTINES_XHER2_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index 285dd6d3..52f70dc9 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPMV_H_ #define CLBLAST_TEST_ROUTINES_XHPMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 88bae86b..39112e49 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPR_H_ #define CLBLAST_TEST_ROUTINES_XHPR_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index cd10fa00..21f0970a 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPR2_H_ #define CLBLAST_TEST_ROUTINES_XHPR2_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index 5c70aba5..94e60dd2 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSBMV_H_ #define CLBLAST_TEST_ROUTINES_XSBMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index 560f5baa..02bfd4e3 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPMV_H_ #define CLBLAST_TEST_ROUTINES_XSPMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 2e12db33..9d992eb2 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPR_H_ #define CLBLAST_TEST_ROUTINES_XSPR_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index a7e22227..520bf412 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPR2_H_ #define CLBLAST_TEST_ROUTINES_XSPR2_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index d9cf9c1e..130fee49 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYMV_H_ #define CLBLAST_TEST_ROUTINES_XSYMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index b60c3a36..2eb07f9b 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR_H_ #define CLBLAST_TEST_ROUTINES_XSYR_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index dd10a3d0..5c3598c5 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR2_H_ #define CLBLAST_TEST_ROUTINES_XSYR2_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index 7eb8ce9e..7ef67424 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTBMV_H_ #define CLBLAST_TEST_ROUTINES_XTBMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index 7f4842f0..6cea7061 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTPMV_H_ #define CLBLAST_TEST_ROUTINES_XTPMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index cb7527ed..7c97c966 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRMV_H_ #define CLBLAST_TEST_ROUTINES_XTRMV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 63d34758..18a3cef5 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_ #define CLBLAST_TEST_ROUTINES_XTRSV_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index a33cbfec..d6ad98f9 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMM_H_ #define CLBLAST_TEST_ROUTINES_XGEMM_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index 74029c7e..beadf62d 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHEMM_H_ #define CLBLAST_TEST_ROUTINES_XHEMM_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index ea13bbc1..b5d22579 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER2K_H_ #define CLBLAST_TEST_ROUTINES_XHER2K_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index b1ce83e0..558f4e76 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHERK_H_ #define CLBLAST_TEST_ROUTINES_XHERK_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 6ab644b8..704a8f9e 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYMM_H_ #define CLBLAST_TEST_ROUTINES_XSYMM_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 1400c4e2..c321b9cf 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR2K_H_ #define CLBLAST_TEST_ROUTINES_XSYR2K_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 2df8d6b0..00a3013d 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYRK_H_ #define CLBLAST_TEST_ROUTINES_XSYRK_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 84adc6e0..660001df 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRMM_H_ #define CLBLAST_TEST_ROUTINES_XTRMM_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index de5b307d..9e8b9565 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -16,18 +16,9 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRSM_H_ #define CLBLAST_TEST_ROUTINES_XTRSM_H_ -#include <vector> -#include <string> - +#include "test/routines/common.hpp" #include "test/routines/level3/xtrsm_data.hpp" -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif - namespace clblast { // ================================================================================================= diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp index 05141bbb..d8b3837c 100644 --- a/test/routines/levelx/xaxpybatched.hpp +++ b/test/routines/levelx/xaxpybatched.hpp @@ -16,17 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ #define CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ -#include <vector> -#include <string> - -#include "utilities/utilities.hpp" - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp index ab5f20c5..e13e9382 100644 --- a/test/routines/levelx/xgemmbatched.hpp +++ b/test/routines/levelx/xgemmbatched.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_ #define CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_ -#include <vector> -#include <string> - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/wrapper_cblas.hpp b/test/wrapper_cblas.hpp index dd610a6c..070d44b5 100644 --- a/test/wrapper_cblas.hpp +++ b/test/wrapper_cblas.hpp @@ -94,7 +94,7 @@ void cblasXrot(const size_t n, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc, const float cos, const float sin) { - cblas_srot(n, + cblas_srot(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), cos, @@ -105,7 +105,7 @@ void cblasXrot(const size_t n, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc, const double cos, const double sin) { - cblas_drot(n, + cblas_drot(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), cos, @@ -117,7 +117,7 @@ void cblasXrotm(const size_t n, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<float>& sparam_buffer, const size_t sparam_offset) { - cblas_srotm(n, + cblas_srotm(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), &sparam_buffer[sparam_offset]); @@ -126,7 +126,7 @@ void cblasXrotm(const size_t n, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<double>& sparam_buffer, const size_t sparam_offset) { - cblas_drotm(n, + cblas_drotm(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), &sparam_buffer[sparam_offset]); @@ -136,28 +136,28 @@ void cblasXrotm(const size_t n, void cblasXswap(const size_t n, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_sswap(n, + cblas_sswap(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } void cblasXswap(const size_t n, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_dswap(n, + cblas_dswap(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } void cblasXswap(const size_t n, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cswap(n, + cblas_cswap(static_cast<int>(n), reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); } void cblasXswap(const size_t n, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zswap(n, + cblas_zswap(static_cast<int>(n), reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); } @@ -177,14 +177,14 @@ void cblasXswap(const size_t n, void cblasXscal(const size_t n, const float alpha, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { - cblas_sscal(n, + cblas_sscal(static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXscal(const size_t n, const double alpha, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { - cblas_dscal(n, + cblas_dscal(static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -192,7 +192,7 @@ void cblasXscal(const size_t n, const float2 alpha, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; - cblas_cscal(n, + cblas_cscal(static_cast<int>(n), alpha_array.data(), reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -200,7 +200,7 @@ void cblasXscal(const size_t n, const double2 alpha, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; - cblas_zscal(n, + cblas_zscal(static_cast<int>(n), alpha_array.data(), reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -218,28 +218,28 @@ void cblasXscal(const size_t n, void cblasXcopy(const size_t n, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_scopy(n, + cblas_scopy(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } void cblasXcopy(const size_t n, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_dcopy(n, + cblas_dcopy(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } void cblasXcopy(const size_t n, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_ccopy(n, + cblas_ccopy(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); } void cblasXcopy(const size_t n, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zcopy(n, + cblas_zcopy(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); } @@ -259,7 +259,7 @@ void cblasXaxpy(const size_t n, const float alpha, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_saxpy(n, + cblas_saxpy(static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); @@ -268,7 +268,7 @@ void cblasXaxpy(const size_t n, const double alpha, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_daxpy(n, + cblas_daxpy(static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); @@ -278,7 +278,7 @@ void cblasXaxpy(const size_t n, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; - cblas_caxpy(n, + cblas_caxpy(static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); @@ -288,7 +288,7 @@ void cblasXaxpy(const size_t n, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; - cblas_zaxpy(n, + cblas_zaxpy(static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); @@ -311,7 +311,7 @@ void cblasXdot(const size_t n, std::vector<float>& dot_buffer, const size_t dot_offset, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { - dot_buffer[dot_offset] = cblas_sdot(n, + dot_buffer[dot_offset] = cblas_sdot(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } @@ -319,7 +319,7 @@ void cblasXdot(const size_t n, std::vector<double>& dot_buffer, const size_t dot_offset, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { - dot_buffer[dot_offset] = cblas_ddot(n, + dot_buffer[dot_offset] = cblas_ddot(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc)); } @@ -342,7 +342,7 @@ void cblasXdotu(const size_t n, std::vector<float2>& dot_buffer, const size_t dot_offset, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cdotu_sub(n, + cblas_cdotu_sub(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset])); @@ -351,7 +351,7 @@ void cblasXdotu(const size_t n, std::vector<double2>& dot_buffer, const size_t dot_offset, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zdotu_sub(n, + cblas_zdotu_sub(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset])); @@ -362,7 +362,7 @@ void cblasXdotc(const size_t n, std::vector<float2>& dot_buffer, const size_t dot_offset, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cdotc_sub(n, + cblas_cdotc_sub(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset])); @@ -371,7 +371,7 @@ void cblasXdotc(const size_t n, std::vector<double2>& dot_buffer, const size_t dot_offset, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zdotc_sub(n, + cblas_zdotc_sub(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset])); @@ -381,25 +381,25 @@ void cblasXdotc(const size_t n, void cblasXnrm2(const size_t n, std::vector<float>& nrm2_buffer, const size_t nrm2_offset, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset] = cblas_snrm2(n, + nrm2_buffer[nrm2_offset] = cblas_snrm2(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXnrm2(const size_t n, std::vector<double>& nrm2_buffer, const size_t nrm2_offset, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset] = cblas_dnrm2(n, + nrm2_buffer[nrm2_offset] = cblas_dnrm2(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXnrm2(const size_t n, std::vector<float2>& nrm2_buffer, const size_t nrm2_offset, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset].real(cblas_scnrm2(n, + nrm2_buffer[nrm2_offset].real(cblas_scnrm2(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc))); } void cblasXnrm2(const size_t n, std::vector<double2>& nrm2_buffer, const size_t nrm2_offset, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n, + nrm2_buffer[nrm2_offset].real(cblas_dznrm2(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc))); } void cblasXnrm2(const size_t n, @@ -417,25 +417,25 @@ void cblasXnrm2(const size_t n, void cblasXasum(const size_t n, std::vector<float>& asum_buffer, const size_t asum_offset, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset] = cblas_sasum(n, + asum_buffer[asum_offset] = cblas_sasum(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXasum(const size_t n, std::vector<double>& asum_buffer, const size_t asum_offset, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset] = cblas_dasum(n, + asum_buffer[asum_offset] = cblas_dasum(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXasum(const size_t n, std::vector<float2>& asum_buffer, const size_t asum_offset, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset].real(cblas_scasum(n, + asum_buffer[asum_offset].real(cblas_scasum(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc))); } void cblasXasum(const size_t n, std::vector<double2>& asum_buffer, const size_t asum_offset, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset].real(cblas_dzasum(n, + asum_buffer[asum_offset].real(cblas_dzasum(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc))); } void cblasXasum(const size_t n, @@ -453,25 +453,25 @@ void cblasXasum(const size_t n, void cblasXamax(const size_t n, std::vector<float>& imax_buffer, const size_t imax_offset, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXamax(const size_t n, std::vector<double>& imax_buffer, const size_t imax_offset, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(static_cast<int>(n), &x_buffer[x_offset], static_cast<int>(x_inc)); } void cblasXamax(const size_t n, std::vector<float2>& imax_buffer, const size_t imax_offset, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(static_cast<int>(n), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } void cblasXamax(const size_t n, std::vector<double2>& imax_buffer, const size_t imax_offset, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(static_cast<int>(n), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } void cblasXamax(const size_t n, @@ -498,7 +498,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const float beta, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sgemv(layout, a_transpose, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -513,7 +513,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const double beta, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dgemv(layout, a_transpose, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -530,7 +530,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_cgemv(layout, a_transpose, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -547,7 +547,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zgemv(layout, a_transpose, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -583,7 +583,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const float beta, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -598,7 +598,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const double beta, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -615,7 +615,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_cgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -632,7 +632,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -670,7 +670,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_chemv(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -687,7 +687,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zhemv(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -706,7 +706,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_chbmv(layout, triangle, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -723,7 +723,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zhbmv(layout, triangle, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -742,7 +742,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_chpmv(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&ap_buffer[ap_offset]), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -759,7 +759,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zhpmv(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&ap_buffer[ap_offset]), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), @@ -776,7 +776,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_ssymv(layout, triangle, - n, + static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -791,7 +791,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dsymv(layout, triangle, - n, + static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -827,7 +827,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_ssbmv(layout, triangle, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -842,7 +842,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dsbmv(layout, triangle, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc), @@ -878,7 +878,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sspmv(layout, triangle, - n, + static_cast<int>(n), alpha, &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc), @@ -893,7 +893,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dspmv(layout, triangle, - n, + static_cast<int>(n), alpha, &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc), @@ -926,7 +926,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_strmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -935,7 +935,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -944,7 +944,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -953,7 +953,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -976,7 +976,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -985,7 +985,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -994,7 +994,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1003,7 +1003,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1026,7 +1026,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& ap_buffer, const size_t ap_offset, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1035,7 +1035,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& ap_buffer, const size_t ap_offset, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1044,7 +1044,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& ap_buffer, const size_t ap_offset, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const float*>(&ap_buffer[ap_offset]), reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1053,7 +1053,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& ap_buffer, const size_t ap_offset, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const double*>(&ap_buffer[ap_offset]), reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1076,7 +1076,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_strsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1085,7 +1085,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1094,7 +1094,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1103,7 +1103,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1114,7 +1114,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1123,7 +1123,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1132,7 +1132,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1141,7 +1141,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast<int>(n), static_cast<int>(k), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1152,7 +1152,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float>& ap_buffer, const size_t ap_offset, std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1161,7 +1161,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double>& ap_buffer, const size_t ap_offset, std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast<int>(x_inc)); } @@ -1170,7 +1170,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<float2>& ap_buffer, const size_t ap_offset, std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const float*>(&ap_buffer[ap_offset]), reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1179,7 +1179,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector<double2>& ap_buffer, const size_t ap_offset, std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast<int>(n), reinterpret_cast<const double*>(&ap_buffer[ap_offset]), reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } @@ -1192,7 +1192,7 @@ void cblasXger(const CBLAS_ORDER layout, const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_sger(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1205,7 +1205,7 @@ void cblasXger(const CBLAS_ORDER layout, const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dger(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1238,7 +1238,7 @@ void cblasXgeru(const CBLAS_ORDER layout, std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_cgeru(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1252,7 +1252,7 @@ void cblasXgeru(const CBLAS_ORDER layout, std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_zgeru(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1268,7 +1268,7 @@ void cblasXgerc(const CBLAS_ORDER layout, std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_cgerc(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1282,7 +1282,7 @@ void cblasXgerc(const CBLAS_ORDER layout, std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_zgerc(layout, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1296,7 +1296,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_cher(layout, triangle, - n, + static_cast<int>(n), alpha, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld); @@ -1307,7 +1307,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_zher(layout, triangle, - n, + static_cast<int>(n), alpha, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld); @@ -1320,7 +1320,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float2>& ap_buffer, const size_t ap_offset) { cblas_chpr(layout, triangle, - n, + static_cast<int>(n), alpha, reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<float*>(&ap_buffer[ap_offset])); @@ -1331,7 +1331,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double2>& ap_buffer, const size_t ap_offset) { cblas_zhpr(layout, triangle, - n, + static_cast<int>(n), alpha, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<double*>(&ap_buffer[ap_offset])); @@ -1346,7 +1346,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_cher2(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1360,7 +1360,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_zher2(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1376,7 +1376,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector<float2>& ap_buffer, const size_t ap_offset) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_chpr2(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1390,7 +1390,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector<double2>& ap_buffer, const size_t ap_offset) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_zhpr2(layout, triangle, - n, + static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc), reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc), @@ -1404,7 +1404,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_ssyr(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &a_buffer[a_offset], a_ld); @@ -1415,7 +1415,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dsyr(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &a_buffer[a_offset], a_ld); @@ -1442,7 +1442,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<float>& ap_buffer, const size_t ap_offset) { cblas_sspr(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &ap_buffer[ap_offset]); @@ -1453,7 +1453,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc, std::vector<double>& ap_buffer, const size_t ap_offset) { cblas_dspr(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &ap_buffer[ap_offset]); @@ -1481,7 +1481,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_ssyr2(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1494,7 +1494,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dsyr2(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1526,7 +1526,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<float>& ap_buffer, const size_t ap_offset) { cblas_sspr2(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1539,7 +1539,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc, std::vector<double>& ap_buffer, const size_t ap_offset) { cblas_dspr2(layout, triangle, - n, + static_cast<int>(n), alpha, &x_buffer[x_offset], static_cast<int>(x_inc), &y_buffer[y_offset], static_cast<int>(y_inc), @@ -1576,7 +1576,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const float beta, std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_sgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1591,7 +1591,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const double beta, std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1608,7 +1608,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_cgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld, @@ -1625,7 +1625,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld, @@ -1661,7 +1661,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const float beta, std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssymm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1676,7 +1676,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const double beta, std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsymm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1693,7 +1693,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_csymm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld, @@ -1710,7 +1710,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zsymm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld, @@ -1748,7 +1748,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_chemm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld, @@ -1765,7 +1765,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zhemm(layout, side, triangle, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld, @@ -1781,7 +1781,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const float beta, std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssyrk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, beta, @@ -1794,7 +1794,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const double beta, std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsyrk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, beta, @@ -1809,7 +1809,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_csyrk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, beta_array.data(), @@ -1824,7 +1824,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zsyrk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, beta_array.data(), @@ -1855,7 +1855,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const float beta, std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_cherk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, beta, @@ -1868,7 +1868,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const double beta, std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_zherk(layout, triangle, a_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, beta, @@ -1884,7 +1884,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const float beta, std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssyr2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1899,7 +1899,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const double beta, std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsyr2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1916,7 +1916,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<float>{beta.real(), beta.imag()}; cblas_csyr2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld, @@ -1933,7 +1933,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; const auto beta_array = std::vector<double>{beta.real(), beta.imag()}; cblas_zsyr2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld, @@ -1970,7 +1970,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_cher2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld, @@ -1986,7 +1986,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_zher2k(layout, triangle, ab_transpose, - n, k, + static_cast<int>(n), static_cast<int>(k), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld, @@ -2001,7 +2001,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_strmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2012,7 +2012,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_dtrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2024,7 +2024,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_ctrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld); @@ -2036,7 +2036,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_ztrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld); @@ -2063,7 +2063,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_strsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2074,7 +2074,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld, std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_dtrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2086,7 +2086,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()}; cblas_ctrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld); @@ -2098,7 +2098,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()}; cblas_ztrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast<int>(m), static_cast<int>(n), alpha_array.data(), reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld, reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld); diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp new file mode 100644 index 00000000..22eb3971 --- /dev/null +++ b/test/wrapper_cublas.hpp @@ -0,0 +1,2682 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a wrapper around the cuBLAS library, such that its routines can be called +// in a similar way as the CLBlast routines: using alpha and beta to determine the precision. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_WRAPPER_CUBLAS_H_ +#define CLBLAST_TEST_WRAPPER_CUBLAS_H_ + +#include <cuda_runtime.h> +#include <cublas_v2.h> + +#include "utilities/utilities.hpp" + +namespace clblast { + +// Conversions from CLBlast types +cublasOperation_t convertToCUBLAS(const Transpose v) { return (v == Transpose::kNo) ? CUBLAS_OP_N : (v == Transpose::kYes) ? CUBLAS_OP_T : CUBLAS_OP_C; } +cublasFillMode_t convertToCUBLAS(const Triangle v) { return (v == Triangle::kUpper) ? CUBLAS_FILL_MODE_UPPER : CUBLAS_FILL_MODE_LOWER; } +cublasDiagType_t convertToCUBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT; } +cublasSideMode_t convertToCUBLAS(const Side v) { return (v == Side::kLeft) ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; } + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SROTG/DROTG +template <typename T> +cublasStatus_t cublasXrotg(T* sa_buffer, const size_t sa_offset, + T* sb_buffer, const size_t sb_offset, + T* sc_buffer, const size_t sc_offset, + T* ss_buffer, const size_t ss_offset); +template <> +cublasStatus_t cublasXrotg<float>(float* sa_buffer, const size_t sa_offset, + float* sb_buffer, const size_t sb_offset, + float* sc_buffer, const size_t sc_offset, + float* ss_buffer, const size_t ss_offset) { + cublasHandle_t handle; + auto status = cublasSrotg(handle, &sa_buffer[sa_offset], + &sb_buffer[sb_offset], + &sc_buffer[sc_offset], + &ss_buffer[ss_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotg<double>(double* sa_buffer, const size_t sa_offset, + double* sb_buffer, const size_t sb_offset, + double* sc_buffer, const size_t sc_offset, + double* ss_buffer, const size_t ss_offset) { + cublasHandle_t handle; + auto status = cublasDrotg(handle, &sa_buffer[sa_offset], + &sb_buffer[sb_offset], + &sc_buffer[sc_offset], + &ss_buffer[ss_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROTMG/DROTMG +template <typename T> +cublasStatus_t cublasXrotmg(T* sd1_buffer, const size_t sd1_offset, + T* sd2_buffer, const size_t sd2_offset, + T* sx1_buffer, const size_t sx1_offset, + const T* sy1_buffer, const size_t sy1_offset, + T* sparam_buffer, const size_t sparam_offset); +template <> +cublasStatus_t cublasXrotmg<float>(float* sd1_buffer, const size_t sd1_offset, + float* sd2_buffer, const size_t sd2_offset, + float* sx1_buffer, const size_t sx1_offset, + const float* sy1_buffer, const size_t sy1_offset, + float* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset], + &sd2_buffer[sd2_offset], + &sx1_buffer[sx1_offset], + &sy1_buffer[sy1_offset], + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotmg<double>(double* sd1_buffer, const size_t sd1_offset, + double* sd2_buffer, const size_t sd2_offset, + double* sx1_buffer, const size_t sx1_offset, + const double* sy1_buffer, const size_t sy1_offset, + double* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset], + &sd2_buffer[sd2_offset], + &sx1_buffer[sx1_offset], + &sy1_buffer[sy1_offset], + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROT/DROT +cublasStatus_t cublasXrot(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc, + const float cos, + const float sin) { + cublasHandle_t handle; + auto status = cublasSrot(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &cos, + &sin); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXrot(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc, + const double cos, + const double sin) { + cublasHandle_t handle; + auto status = cublasDrot(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &cos, + &sin); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROTM/DROTM +template <typename T> +cublasStatus_t cublasXrotm(const size_t n, + T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc, + T* sparam_buffer, const size_t sparam_offset); +template <> +cublasStatus_t cublasXrotm<float>(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc, + float* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasSrotm(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotm<double>(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc, + double* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasDrotm(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP +template <typename T> +cublasStatus_t cublasXswap(const size_t n, + T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXswap<float>(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSswap(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap<double>(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDswap(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap<float2>(const size_t n, + float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCswap(handle, static_cast<int>(n), + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap<double2>(const size_t n, + double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZswap(handle, static_cast<int>(n), + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap<half>(const size_t n, + half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL +cublasStatus_t cublasXscal(const size_t n, + const float alpha, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSscal(handle, static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const double alpha, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDscal(handle, static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const float2 alpha, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCscal(handle, static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const double2 alpha, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZscal(handle, static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const half alpha, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY +template <typename T> +cublasStatus_t cublasXcopy(const size_t n, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXcopy<float>(const size_t n, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasScopy(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy<double>(const size_t n, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDcopy(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy<float2>(const size_t n, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCcopy(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy<double2>(const size_t n, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZcopy(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy<half>(const size_t n, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY +cublasStatus_t cublasXaxpy(const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSaxpy(handle, static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDaxpy(handle, static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCaxpy(handle, static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZaxpy(handle, static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SDOT/DDOT +template <typename T> +cublasStatus_t cublasXdot(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdot<float>(const size_t n, + float* dot_buffer, const size_t dot_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSdot(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &dot_buffer[dot_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdot<double>(const size_t n, + double* dot_buffer, const size_t dot_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDdot(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &dot_buffer[dot_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdot<half>(const size_t n, + half* dot_buffer, const size_t dot_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CDOTU/ZDOTU +template <typename T> +cublasStatus_t cublasXdotu(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdotu<float2>(const size_t n, + float2* dot_buffer, const size_t dot_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCdotu(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdotu<double2>(const size_t n, + double2* dot_buffer, const size_t dot_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZdotu(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset])); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CDOTC/ZDOTC +template <typename T> +cublasStatus_t cublasXdotc(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdotc<float2>(const size_t n, + float2* dot_buffer, const size_t dot_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCdotc(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdotc<double2>(const size_t n, + double2* dot_buffer, const size_t dot_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZdotc(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset])); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2 +template <typename T> +cublasStatus_t cublasXnrm2(const size_t n, + T* nrm2_buffer, const size_t nrm2_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXnrm2<float>(const size_t n, + float* nrm2_buffer, const size_t nrm2_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSnrm2(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &nrm2_buffer[nrm2_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2<double>(const size_t n, + double* nrm2_buffer, const size_t nrm2_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDnrm2(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &nrm2_buffer[nrm2_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2<float2>(const size_t n, + float2* nrm2_buffer, const size_t nrm2_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasScnrm2(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&nrm2_buffer[nrm2_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2<double2>(const size_t n, + double2* nrm2_buffer, const size_t nrm2_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDznrm2(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&nrm2_buffer[nrm2_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2<half>(const size_t n, + half* nrm2_buffer, const size_t nrm2_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SASUM/DASUM/ScASUM/DzASUM +template <typename T> +cublasStatus_t cublasXasum(const size_t n, + T* asum_buffer, const size_t asum_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXasum<float>(const size_t n, + float* asum_buffer, const size_t asum_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSasum(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &asum_buffer[asum_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum<double>(const size_t n, + double* asum_buffer, const size_t asum_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDasum(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &asum_buffer[asum_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum<float2>(const size_t n, + float2* asum_buffer, const size_t asum_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasScasum(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&asum_buffer[asum_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum<double2>(const size_t n, + double2* asum_buffer, const size_t asum_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDzasum(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&asum_buffer[asum_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum<half>(const size_t n, + half* asum_buffer, const size_t asum_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX +template <typename T> +cublasStatus_t cublasXamax(const size_t n, + T* imax_buffer, const size_t imax_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXamax<float>(const size_t n, + float* imax_buffer, const size_t imax_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasIsamax(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &imax_buffer[imax_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax<double>(const size_t n, + double* imax_buffer, const size_t imax_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasIdamax(handle, static_cast<int>(n), + &x_buffer[x_offset], static_cast<int>(x_inc), + &imax_buffer[imax_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax<float2>(const size_t n, + float2* imax_buffer, const size_t imax_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasIcamax(handle, static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&imax_buffer[imax_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax<double2>(const size_t n, + double2* imax_buffer, const size_t imax_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasIzamax(handle, static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&imax_buffer[imax_offset])); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax<half>(const size_t n, + half* imax_buffer, const size_t imax_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSgemv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDgemv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgemv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgemv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSgbmv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDgbmv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgbmv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgbmv(handle, a_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHEMV/ZHEMV +cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChemv(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhemv(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHBMV/ZHBMV +cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChbmv(handle, triangle, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhbmv(handle, triangle, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPMV/ZHPMV +cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* ap_buffer, const size_t ap_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChpmv(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* ap_buffer, const size_t ap_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhpmv(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYMV/DSYMV +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsymv(handle, triangle, + static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsymv(handle, triangle, + static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSBMV/DSBMV +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsbmv(handle, triangle, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsbmv(handle, triangle, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPMV/DSPMV +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* ap_buffer, const size_t ap_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSspmv(handle, triangle, + static_cast<int>(n), + &alpha, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* ap_buffer, const size_t ap_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDspmv(handle, triangle, + static_cast<int>(n), + &alpha, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc), + &beta, + &y_buffer[y_offset], static_cast<int>(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* ap_buffer, const size_t ap_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV +template <typename T> +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtrmv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStrmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv<half>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV +template <typename T> +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtbmv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStbmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv<half>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV +template <typename T> +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* ap_buffer, const size_t ap_offset, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtpmv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* ap_buffer, const size_t ap_offset, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStpmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* ap_buffer, const size_t ap_offset, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* ap_buffer, const size_t ap_offset, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* ap_buffer, const size_t ap_offset, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv<half>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const half* ap_buffer, const size_t ap_offset, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV +template <typename T> +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtrsv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStrsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV +template <typename T> +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtbsv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStbsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV +template <typename T> +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* ap_buffer, const size_t ap_offset, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtpsv<float>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* ap_buffer, const size_t ap_offset, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStpsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv<double>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* ap_buffer, const size_t ap_offset, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv<float2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* ap_buffer, const size_t ap_offset, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv<double2>(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* ap_buffer, const size_t ap_offset, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal, + static_cast<int>(n), + reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]), + reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SGER/DGER +cublasStatus_t cublasXger(const Layout layout, + const size_t m, const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSger(handle, static_cast<int>(m), static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXger(const Layout layout, + const size_t m, const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDger(handle, static_cast<int>(m), static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXger(const Layout layout, + const size_t m, const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CGERU/ZGERU +cublasStatus_t cublasXgeru(const Layout layout, + const size_t m, const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCgeru(handle, static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgeru(const Layout layout, + const size_t m, const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZgeru(handle, static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CGERC/ZGERC +cublasStatus_t cublasXgerc(const Layout layout, + const size_t m, const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCgerc(handle, static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgerc(const Layout layout, + const size_t m, const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZgerc(handle, static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHER/ZHER +cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCher(handle, triangle, + static_cast<int>(n), + &alpha, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZher(handle, triangle, + static_cast<int>(n), + &alpha, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPR/ZHPR +cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasChpr(handle, triangle, + static_cast<int>(n), + &alpha, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset])); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZhpr(handle, triangle, + static_cast<int>(n), + &alpha, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset])); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHER2/ZHER2 +cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCher2(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZher2(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPR2/ZHPR2 +cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasChpr2(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset])); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZhpr2(handle, triangle, + static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc), + reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc), + reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset])); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYR/DSYR +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsyr(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsyr(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPR/DSPR +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSspr(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDspr(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* ap_buffer, const size_t ap_offset) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSYR2/DSYR2 +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsyr2(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsyr2(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPR2/DSPR2 +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSspr2(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDspr2(handle, triangle, + static_cast<int>(n), + &alpha, + &x_buffer[x_offset], static_cast<int>(x_inc), + &y_buffer[y_offset], static_cast<int>(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* ap_buffer, const size_t ap_offset) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSgemm(handle, a_transpose, b_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDgemm(handle, a_transpose, b_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgemm(handle, a_transpose, b_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgemm(handle, a_transpose, b_transpose, + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsymm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsymm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsymm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsymm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHEMM/ZHEMM +cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChemm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhemm(handle, side, triangle, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsyrk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsyrk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsyrk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + &beta_cuda, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsyrk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHERK/ZHERK +cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasCherk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + &beta, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasZherk(handle, triangle, a_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + &beta, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasSsyr2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDsyr2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsyr2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsyr2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHER2K/ZHER2K +cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCher2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld, + &beta, + reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZher2k(handle, triangle, ab_transpose, + static_cast<int>(n), static_cast<int>(k), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld, + &beta, + reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* b_buffer, const size_t b_offset, const size_t b_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cublasHandle_t handle; + auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cublasHandle_t handle; + auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal, + static_cast<int>(m), static_cast<int>(n), + &alpha_cuda, + reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld, + reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld); + cublasDestroy(handle); + return status; +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_WRAPPER_CUBLAS_H_ +#endif diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp new file mode 100644 index 00000000..509de9d1 --- /dev/null +++ b/test/wrapper_cuda.hpp @@ -0,0 +1,111 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains all the CUDA related code; used only in case of testing against cuBLAS +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_WRAPPER_CUDA_H_ +#define CLBLAST_TEST_WRAPPER_CUDA_H_ + +#include <string> +#include <vector> +#include <memory> +#include <stdexcept> + +#include "utilities/utilities.hpp" + +#ifdef CLBLAST_REF_CUBLAS + #include <cuda_runtime.h> + #include <cublas_v2.h> +#endif + +namespace clblast { +// ================================================================================================= + +// Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards +#ifdef CLBLAST_REF_CUBLAS + template <typename T> + void CUDAToHost(T* buffer_cuda, std::vector<T> &buffer_host, const size_t size) { + cudaMemcpy( + reinterpret_cast<void*>(buffer_host.data()), + reinterpret_cast<void*>(buffer_cuda), + size*sizeof(T), + cudaMemcpyDeviceToHost + ); + cudaFree(buffer_cuda); +} +#else + template <typename T> void CUDAToHost(T*, const std::vector<T>&, const size_t) { } +#endif + +// Allocates space on the CUDA device and copies in data from the host +#ifdef CLBLAST_REF_CUBLAS + template <typename T> + void HostToCUDA(T* buffer_cuda, std::vector<T> &buffer_host, const size_t size) { + cudaMalloc(reinterpret_cast<void**>(&buffer_cuda), size*sizeof(T)); + cudaMemcpy( + reinterpret_cast<void*>(buffer_cuda), + reinterpret_cast<void*>(buffer_host.data()), + size*sizeof(T), + cudaMemcpyHostToDevice + ); + } +#else + template <typename T> void HostToCUDA(T*, const std::vector<T>&, const size_t) { } +#endif + +// ================================================================================================= + +template <typename T> +struct BuffersCUDA { + T* x_vec; + T* y_vec; + T* a_mat; + T* b_mat; + T* c_mat; + T* ap_mat; + T* scalar; +}; + +template <typename T, typename U> +void CUDAToHost(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host, + const std::vector<std::string> &names) { + for (auto &name: names) { + if (name == kBufVecX) { buffers_host.x_vec = std::vector<T>(args.x_size, static_cast<T>(0)); CUDAToHost(buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { buffers_host.y_vec = std::vector<T>(args.y_size, static_cast<T>(0)); CUDAToHost(buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { buffers_host.a_mat = std::vector<T>(args.a_size, static_cast<T>(0)); CUDAToHost(buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { buffers_host.b_mat = std::vector<T>(args.b_size, static_cast<T>(0)); CUDAToHost(buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { buffers_host.c_mat = std::vector<T>(args.c_size, static_cast<T>(0)); CUDAToHost(buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector<T>(args.ap_size, static_cast<T>(0)); CUDAToHost(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { buffers_host.scalar = std::vector<T>(args.scalar_size, static_cast<T>(0)); CUDAToHost(buffers.scalar, buffers_host.scalar, args.scalar_size); } + else { throw std::runtime_error("Invalid buffer name"); } + } +} + +template <typename T, typename U> +void HostToCUDA(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host, + const std::vector<std::string> &names) { + for (auto &name: names) { + if (name == kBufVecX) { HostToCUDA(buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { HostToCUDA(buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { HostToCUDA(buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { HostToCUDA(buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { HostToCUDA(buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { HostToCUDA(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { HostToCUDA(buffers.scalar, buffers_host.scalar, args.scalar_size); } + else { throw std::runtime_error("Invalid buffer name"); } + } +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_WRAPPER_CUDA_H_ +#endif |