From 3876096c30ad4eed5769dbc88dbfe75b7571718a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 25 Mar 2016 10:00:40 +0100 Subject: Added prototypes for SNRM2/DNRM2 routines --- test/correctness/routines/level1/xnrm2.cc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 test/correctness/routines/level1/xnrm2.cc (limited to 'test/correctness/routines') diff --git a/test/correctness/routines/level1/xnrm2.cc b/test/correctness/routines/level1/xnrm2.cc new file mode 100644 index 00000000..8238e868 --- /dev/null +++ b/test/correctness/routines/level1/xnrm2.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xnrm2.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests, float, float>(argc, argv, false, "SNRM2"); + clblast::RunTests, double, double>(argc, argv, true, "DNRM2"); + return 0; +} + +// ================================================================================================= -- cgit v1.2.3 From 1d5a702d9d31afa320a15ed9fa79471aec314f4a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 25 Mar 2016 10:30:38 +0100 Subject: Added prototypes for ScNRM2/DzNRM2 routines --- include/clblast.h | 2 +- include/clblast_c.h | 10 +++++++++- scripts/generator/datatype.py | 3 ++- scripts/generator/generator.py | 28 ++++++++++++++------------- src/clblast.cc | 10 +++++++++- src/clblast_c.cc | 20 +++++++++++++++++++ test/correctness/routines/level1/xnrm2.cc | 2 ++ test/performance/routines/level1/xnrm2.cc | 6 ++++-- test/wrapper_clblas.h | 32 ++++++++++++++++++++++++++++++- 9 files changed, 93 insertions(+), 20 deletions(-) (limited to 'test/correctness/routines') diff --git a/include/clblast.h b/include/clblast.h index 905de774..d837cb71 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -140,7 +140,7 @@ StatusCode Dotc(const size_t n, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); -// Euclidian norm of a vector: SNRM2/DNRM2 +// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2 template StatusCode Nrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, diff --git a/include/clblast_c.h b/include/clblast_c.h index 1e4be1ab..e93ee465 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -208,7 +208,7 @@ StatusCode PUBLIC_API CLBlastZdotc(const size_t n, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); -// Euclidian norm of a vector: SNRM2/DNRM2 +// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2 StatusCode PUBLIC_API CLBlastSnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, @@ -217,6 +217,14 @@ StatusCode PUBLIC_API CLBlastDnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastScnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDznrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); // ================================================================================================= // BLAS level-2 (matrix-vector) routines diff --git a/scripts/generator/datatype.py b/scripts/generator/datatype.py index 0aa27197..9323bc4d 100644 --- a/scripts/generator/datatype.py +++ b/scripts/generator/datatype.py @@ -22,7 +22,8 @@ D2CL = "cl_double2" # Structure holding data-type and precision information class DataType(): - def __init__(self, name, template, scalars, buffertype): + def __init__(self, precision_name, name, template, scalars, buffertype): + self.precision_name = precision_name self.name = name self.template = template self.alpha_cpp = scalars[0] diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 2c22a6fd..253f1a92 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -31,21 +31,23 @@ from datatype import DataType, FLT, DBL, FLT2, DBL2, F2CL, D2CL # ================================================================================================== # Regular data-types -S = DataType("S", FLT, [FLT, FLT, FLT, FLT], FLT ) # single (32) -D = DataType("D", DBL, [DBL, DBL, DBL, DBL], DBL ) # double (64) -C = DataType("C", FLT2, [FLT2, FLT2, F2CL, F2CL], FLT2) # single-complex (3232) -Z = DataType("Z", DBL2, [DBL2, DBL2, D2CL, D2CL], DBL2) # double-complex (6464) +S = DataType("S", "S", FLT, [FLT, FLT, FLT, FLT], FLT ) # single (32) +D = DataType("D", "D", DBL, [DBL, DBL, DBL, DBL], DBL ) # double (64) +C = DataType("C", "C", FLT2, [FLT2, FLT2, F2CL, F2CL], FLT2) # single-complex (3232) +Z = DataType("Z", "Z", DBL2, [DBL2, DBL2, D2CL, D2CL], DBL2) # double-complex (6464) # Special cases -Css = DataType("C", FLT, [FLT, FLT, FLT, FLT], FLT2) # As C, but with constants from S -Zdd = DataType("Z", DBL, [DBL, DBL, DBL, DBL], DBL2) # As Z, but with constants from D -Ccs = DataType("C", FLT2+","+FLT, [FLT2, FLT, F2CL, FLT], FLT2) # As C, but with one constant from S -Zzd = DataType("Z", DBL2+","+DBL, [DBL2, DBL, D2CL, DBL], DBL2) # As Z, but with one constant from D +Sc = DataType("C", "Sc", FLT2, [FLT2, FLT2, FLT2, FLT2], FLT2) # As C, but with real output +Dz = DataType("Z", "Dz", DBL2, [DBL2, DBL2, DBL2, DBL2], DBL2) # As Z, but with real output +Css = DataType("C", "C", FLT, [FLT, FLT, FLT, FLT], FLT2) # As C, but with constants from S +Zdd = DataType("Z", "Z", DBL, [DBL, DBL, DBL, DBL], DBL2) # As Z, but with constants from D +Ccs = DataType("C", "C", FLT2+","+FLT, [FLT2, FLT, F2CL, FLT], FLT2) # As C, but with one constant from S +Zzd = DataType("Z", "Z", DBL2+","+DBL, [DBL2, DBL, D2CL, DBL], DBL2) # As Z, but with one constant from D # C++ template data-types -T = DataType("typename T", "T", ["T", "T", "T", "T"], "T") # regular routine -Tc = DataType("typename T", "std::complex,T", ["T", "T", "T", "T"], "std::complex") # for herk -TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for her2k +T = DataType("T", "typename T", "T", ["T", "T", "T", "T"], "T") # regular routine +Tc = DataType("Tc", "typename T", "std::complex,T", ["T", "T", "T", "T"], "std::complex") # for herk +TU = DataType("TU", "typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for her2k # ================================================================================================== @@ -61,7 +63,7 @@ routines = [ Routine(True, "1", "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"), Routine(True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"), Routine(True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"), - Routine(False, "1", "nrm2", T, [S,D], ["n"], [], ["x"], ["nrm2"], [], True, "Euclidian norm of a vector"), + Routine(False, "1", "nrm2", T, [S,D,Sc,Dz],["n"], [], ["x"], ["nrm2"], [], True, "Euclidian norm of a vector"), ], [ # Level 2: matrix-vector Routine(True, "2a", "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General matrix-vector multiplication"), @@ -332,7 +334,7 @@ for level in [1,2,3]: body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":" found = False for flavour in routine.flavours: - if flavour.name == precision: + if flavour.precision_name == precision: body += "\n clblast::RunClient(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); -// Euclidian norm of a vector: SNRM2/DNRM2 +// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2 template StatusCode Nrm2(const size_t, cl_mem, const size_t, @@ -301,6 +301,14 @@ template StatusCode PUBLIC_API Nrm2(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Nrm2(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Nrm2(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); // ================================================================================================= // BLAS level-2 (matrix-vector) routines diff --git a/src/clblast_c.cc b/src/clblast_c.cc index d735fa35..fa25d4a7 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -300,6 +300,26 @@ StatusCode CLBlastDnrm2(const size_t n, queue, event); return static_cast(status); } +StatusCode CLBlastScnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Nrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast(status); +} +StatusCode CLBlastDznrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Nrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast(status); +} // ================================================================================================= // BLAS level-2 (matrix-vector) routines diff --git a/test/correctness/routines/level1/xnrm2.cc b/test/correctness/routines/level1/xnrm2.cc index 8238e868..97fb0ad6 100644 --- a/test/correctness/routines/level1/xnrm2.cc +++ b/test/correctness/routines/level1/xnrm2.cc @@ -20,6 +20,8 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { clblast::RunTests, float, float>(argc, argv, false, "SNRM2"); clblast::RunTests, double, double>(argc, argv, true, "DNRM2"); + clblast::RunTests, float2, float2>(argc, argv, true, "ScNRM2"); + clblast::RunTests, double2, double2>(argc, argv, true, "DzNRM2"); return 0; } diff --git a/test/performance/routines/level1/xnrm2.cc b/test/performance/routines/level1/xnrm2.cc index d5ae348b..db6ec9ad 100644 --- a/test/performance/routines/level1/xnrm2.cc +++ b/test/performance/routines/level1/xnrm2.cc @@ -24,8 +24,10 @@ int main(int argc, char *argv[]) { clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; - case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); - case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexSingle: + clblast::RunClient, float2, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 501f0bc5..37d9eee5 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -350,7 +350,7 @@ clblasStatus clblasXdotc(const size_t n, num_queues, queues, num_wait_events, wait_events, events); } -// Forwards the clBLAS calls for SNRM2/DNRM2 +// Forwards the clBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2 template clblasStatus clblasXnrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, @@ -387,6 +387,36 @@ clblasStatus clblasXnrm2(const size_t n, scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer(context, n*x_inc + x_offset); + return clblasScnrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, static_cast(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer(context, n*x_inc + x_offset); + return clblasDznrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, static_cast(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} // ================================================================================================= // BLAS level-2 (matrix-vector) routines -- cgit v1.2.3