diff options
-rw-r--r-- | include/clblast.h | 7 | ||||
-rw-r--r-- | include/clblast_c.h | 10 | ||||
-rw-r--r-- | scripts/generator/generator.py | 5 | ||||
-rw-r--r-- | scripts/generator/routine.py | 14 | ||||
-rw-r--r-- | src/clblast.cc | 17 | ||||
-rw-r--r-- | src/clblast_c.cc | 22 | ||||
-rw-r--r-- | test/correctness/routines/level1/xnrm2.cc | 26 | ||||
-rw-r--r-- | test/performance/routines/level1/xnrm2.cc | 33 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 38 |
9 files changed, 163 insertions, 9 deletions
diff --git a/include/clblast.h b/include/clblast.h index 70a3b5bc..905de774 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -140,6 +140,13 @@ StatusCode Dotc(const size_t n, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); +// Euclidian norm of a vector: SNRM2/DNRM2 +template <typename T> +StatusCode Nrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/include/clblast_c.h b/include/clblast_c.h index c5395e51..1e4be1ab 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -208,6 +208,16 @@ StatusCode PUBLIC_API CLBlastZdotc(const size_t n, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); +// Euclidian norm of a vector: SNRM2/DNRM2 +StatusCode PUBLIC_API CLBlastSnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 8ff5e130..2c22a6fd 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -61,6 +61,7 @@ routines = [ Routine(True, "1", "dot", T, [S,D], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two vectors"), Routine(True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors"), Routine(True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], True, "Dot product of two complex vectors, one conjugated"), + Routine(False, "1", "nrm2", T, [S,D], ["n"], [], ["x"], ["nrm2"], [], True, "Euclidian norm of a vector"), ], [ # Level 2: matrix-vector Routine(True, "2a", "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], False, "General matrix-vector multiplication"), @@ -247,8 +248,8 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 64, 88, 24, 22] -footer_lines = [6, 3, 5, 2, 6] +header_lines = [84, 64, 93, 22, 22] +footer_lines = [6, 3, 9, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise for f in files: diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index 60b9fcc5..ecfe6798 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -40,7 +40,7 @@ def OptionToWrapper(x): }[x] # Buffers without 'ld' or 'inc' parameter -NO_LD_INC = ["dot","ap"] +NO_LD_INC = ["dot","nrm2","ap"] # ================================================================================================== @@ -252,7 +252,7 @@ class Routine(): # Retrieves a combination of all the argument names, with Claduc casts def ArgumentsCladuc(self, flavour, indent): - return (self.Options() + self.Sizes() + self.BufferCladuc("dot") + + return (self.Options() + self.Sizes() + self.BufferCladuc("dot") + self.BufferCladuc("nrm2") + self.Scalar("alpha") + list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) + self.Scalar("beta") + @@ -261,7 +261,7 @@ class Routine(): # Retrieves a combination of all the argument names, with CLBlast casts def ArgumentsCast(self, flavour, indent): - return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") + + return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") + self.Buffer("nrm2") + self.ScalarUse("alpha", flavour) + list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) + self.ScalarUse("beta", flavour) + @@ -270,7 +270,7 @@ class Routine(): # As above, but for the clBLAS wrapper def ArgumentsWrapper(self, flavour): - return (self.Options() + self.Sizes() + self.BufferWrapper("dot") + + return (self.Options() + self.Sizes() + self.BufferWrapper("dot") + self.BufferWrapper("nrm2") + self.ScalarUseWrapper("alpha", flavour) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) + self.ScalarUseWrapper("beta", flavour) + @@ -279,7 +279,7 @@ class Routine(): # Retrieves a combination of all the argument definitions def ArgumentsDef(self, flavour): - return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") + + return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") + self.BufferDef("nrm2") + self.ScalarDef("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDef("beta", flavour) + @@ -288,7 +288,7 @@ class Routine(): # As above, but clBLAS wrapper plain datatypes def ArgumentsDefWrapper(self, flavour): - return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") + + return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") + self.BufferDef("nrm2") + self.ScalarDefPlain("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDefPlain("beta", flavour) + @@ -297,7 +297,7 @@ class Routine(): # Retrieves a combination of all the argument types def ArgumentsType(self, flavour): - return (self.OptionsType() + self.SizesType() + self.BufferType("dot") + + return (self.OptionsType() + self.SizesType() + self.BufferType("dot") + self.BufferType("nrm2") + self.ScalarType("alpha", flavour) + list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) + self.ScalarType("beta", flavour) + diff --git a/src/clblast.cc b/src/clblast.cc index e7f2477f..9079355a 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -285,6 +285,23 @@ template StatusCode PUBLIC_API Dotc<double2>(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +// Euclidian norm of a vector: SNRM2/DNRM2 +template <typename T> +StatusCode Nrm2(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Nrm2<float>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Nrm2<double>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/src/clblast_c.cc b/src/clblast_c.cc index 66d16f6d..d735fa35 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -279,6 +279,28 @@ StatusCode CLBlastZdotc(const size_t n, return static_cast<StatusCode>(status); } +// NRM2 +StatusCode CLBlastSnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Nrm2<float>(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Nrm2<double>(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/test/correctness/routines/level1/xnrm2.cc b/test/correctness/routines/level1/xnrm2.cc new file mode 100644 index 00000000..8238e868 --- /dev/null +++ b/test/correctness/routines/level1/xnrm2.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xnrm2.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXnrm2<float>, float, float>(argc, argv, false, "SNRM2"); + clblast::RunTests<clblast::TestXnrm2<double>, double, double>(argc, argv, true, "DNRM2"); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xnrm2.cc b/test/performance/routines/level1/xnrm2.cc new file mode 100644 index 00000000..d5ae348b --- /dev/null +++ b/test/performance/routines/level1/xnrm2.cc @@ -0,0 +1,33 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xnrm2.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXnrm2<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXnrm2<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } + return 0; +} + +// ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 23a02a45..501f0bc5 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -350,6 +350,44 @@ clblasStatus clblasXdotc<double2>(const size_t n, num_queues, queues, num_wait_events, wait_events, events); } +// Forwards the clBLAS calls for SNRM2/DNRM2 +template <typename T> +clblasStatus clblasXnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXnrm2<float>(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float>(context, n*x_inc + x_offset); + return clblasSnrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXnrm2<double>(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double>(context, n*x_inc + x_offset); + return clblasDnrm2(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= |