diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-03-30 17:32:13 +0200 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-03-30 16:13:37 -0700 |
commit | c1df78676471a8a26ea8ec5a092734566d490db4 (patch) | |
tree | 272bd2cda6a54929f83e4d2c6c7a0652384f88ec | |
parent | 6ecc0d089c80296cce3089734771279a30783f81 (diff) |
Added prototypes for the xROTM and xROTMG routines
-rw-r--r-- | include/clblast.h | 33 | ||||
-rw-r--r-- | include/clblast_c.h | 58 | ||||
-rw-r--r-- | scripts/generator/generator.py | 6 | ||||
-rw-r--r-- | scripts/generator/routine.py | 42 | ||||
-rw-r--r-- | src/clblast.cc | 47 | ||||
-rw-r--r-- | src/clblast_c.cc | 106 | ||||
-rw-r--r-- | test/correctness/routines/level1/xrotm.cc | 26 | ||||
-rw-r--r-- | test/correctness/routines/level1/xrotmg.cc | 26 | ||||
-rw-r--r-- | test/performance/routines/level1/xrotm.cc | 33 | ||||
-rw-r--r-- | test/performance/routines/level1/xrotmg.cc | 33 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 131 |
11 files changed, 445 insertions, 96 deletions
diff --git a/include/clblast.h b/include/clblast.h index a5fd30f8..ac16188f 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -87,23 +87,40 @@ enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64, // BLAS level-1 (vector-vector) routines // ================================================================================================= -// Generate plane rotation: SROTG/DROTG +// Generate givens plane rotation: SROTG/DROTG template <typename T> -StatusCode Rotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, +StatusCode Rotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, cl_command_queue* queue, cl_event* event = nullptr); -// Apply plane rotation: SROT/DROT +// Generate modified givens plane rotation: SROTMG/DROTMG +template <typename T> +StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event = nullptr); + +// Apply givens plane rotation: SROT/DROT template <typename T> StatusCode Rot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const T C, - const T S, + const T cos, + const T sin, cl_command_queue* queue, cl_event* event = nullptr); +// Apply modified givens plane rotation: SROTM/DROTM +template <typename T> +StatusCode Rotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event = nullptr); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> StatusCode Swap(const size_t n, diff --git a/include/clblast_c.h b/include/clblast_c.h index be5bab57..a5563951 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -96,32 +96,58 @@ typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64, // BLAS level-1 (vector-vector) routines // ================================================================================================= -// Generate plane rotation: SROTG/DROTG -StatusCode PUBLIC_API CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, - cl_command_queue* queue, cl_event* event); -StatusCode PUBLIC_API CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, - cl_command_queue* queue, cl_event* event); +// Generate givens plane rotation: SROTG/DROTG +StatusCode PUBLIC_API CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, + cl_command_queue* queue, cl_event* event); + +// Generate modified givens plane rotation: SROTMG/DROTMG +StatusCode PUBLIC_API CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); -// Apply plane rotation: SROT/DROT +// Apply givens plane rotation: SROT/DROT StatusCode PUBLIC_API CLBlastSrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const float C, - const float S, + const float cos, + const float sin, cl_command_queue* queue, cl_event* event); StatusCode PUBLIC_API CLBlastDrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const double C, - const double S, + const double cos, + const double sin, cl_command_queue* queue, cl_event* event); +// Apply modified givens plane rotation: SROTM/DROTM +StatusCode PUBLIC_API CLBlastSrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP StatusCode PUBLIC_API CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 7191cba1..1eada753 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -52,8 +52,10 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for # Populates a list of routines routines = [ [ # Level 1: vector-vector - Routine(False, "1", "rotg", T, [S,D], [], [], [], ["SA","SB","C","S"], [], False, "Generate plane rotation"), - Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["C","S"], False, "Apply plane rotation"), + Routine(False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], [], False, "Generate givens plane rotation"), + Routine(False, "1", "rotmg", T, [S,D], [], [], [], ["sd1","sd2","sx1","sy1","sparam"], [], False, "Generate modified givens plane rotation"), + Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["cos","sin"], False, "Apply givens plane rotation"), + Routine(False, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [], False, "Apply modified givens plane rotation"), Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"), Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"), Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"), diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index d74def25..0a61490b 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -59,12 +59,18 @@ class Routine(): self.description = description # List of scalar buffers - def ScalarBuffers(self): - return ["SA","SB","C","S","dot"] + def ScalarBuffersFirst(self): + return ["dot"] + def ScalarBuffersSecond(self): + return ["sa","sb","sc","ss","sd1","sd2","sx1","sy1","sparam"] + + # List of scalars other than alpha and beta + def OtherScalars(self): + return ["cos","sin"] # List of buffers without 'inc' or 'ld' def BuffersWithoutLdInc(self): - return self.ScalarBuffers() + ["ap"] + return self.ScalarBuffersFirst() + self.ScalarBuffersSecond() + ["ap"] # Retrieves the number of characters in the routine's name def Length(self): @@ -258,62 +264,68 @@ class Routine(): # Retrieves a combination of all the argument names, with Claduc casts def ArgumentsCladuc(self, flavour, indent): return (self.Options() + self.Sizes() + - list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffersFirst()])) + self.Scalar("alpha") + list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) + self.Scalar("beta") + list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) + - list(chain(*[self.Scalar(s) for s in ["C","S"]]))) + list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.Scalar(s) for s in self.OtherScalars()]))) # Retrieves a combination of all the argument names, with CLBlast casts def ArgumentsCast(self, flavour, indent): return (self.OptionsCast(indent) + self.Sizes() + - list(chain(*[self.Buffer(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.Buffer(b) for b in self.ScalarBuffersFirst()])) + self.ScalarUse("alpha", flavour) + list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) + self.ScalarUse("beta", flavour) + list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarUse(s, flavour) for s in ["C","S"]]))) + list(chain(*[self.Buffer(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarUse(s, flavour) for s in self.OtherScalars()]))) # As above, but for the clBLAS wrapper def ArgumentsWrapper(self, flavour): return (self.Options() + self.Sizes() + - list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersFirst()])) + self.ScalarUseWrapper("alpha", flavour) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) + self.ScalarUseWrapper("beta", flavour) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["C","S"]]))) + list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarUseWrapper(s, flavour) for s in self.OtherScalars()]))) # Retrieves a combination of all the argument definitions def ArgumentsDef(self, flavour): return (self.OptionsDef() + self.SizesDef() + - list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) + self.ScalarDef("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDef("beta", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarDef(s, flavour) for s in ["C","S"]]))) + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarDef(s, flavour) for s in self.OtherScalars()]))) # As above, but clBLAS wrapper plain datatypes def ArgumentsDefWrapper(self, flavour): return (self.OptionsDefWrapper() + self.SizesDef() + - list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) + self.ScalarDefPlain("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDefPlain("beta", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["C","S"]]))) + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()]))) # Retrieves a combination of all the argument types def ArgumentsType(self, flavour): return (self.OptionsType() + self.SizesType() + - list(chain(*[self.BufferType(b) for b in self.ScalarBuffers()])) + + list(chain(*[self.BufferType(b) for b in self.ScalarBuffersFirst()])) + self.ScalarType("alpha", flavour) + list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) + self.ScalarType("beta", flavour) + list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarType(s, flavour) for s in ["C","S"]]))) + list(chain(*[self.BufferType(b) for b in self.ScalarBuffersSecond()])) + + list(chain(*[self.ScalarType(s, flavour) for s in self.OtherScalars()]))) # ============================================================================================== diff --git a/src/clblast.cc b/src/clblast.cc index 8f7abfd6..1b2c3a12 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -67,7 +67,7 @@ namespace clblast { // BLAS level-1 (vector-vector) routines // ================================================================================================= -// Generate plane rotation: SROTG/DROTG +// Generate givens plane rotation: SROTG/DROTG template <typename T> StatusCode Rotg(cl_mem, const size_t, cl_mem, const size_t, @@ -87,7 +87,30 @@ template StatusCode PUBLIC_API Rotg<double>(cl_mem, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); -// Apply plane rotation: SROT/DROT +// Generate modified givens plane rotation: SROTMG/DROTMG +template <typename T> +StatusCode Rotmg(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Rotmg<float>(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Rotmg<double>(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); + +// Apply givens plane rotation: SROT/DROT template <typename T> StatusCode Rot(const size_t, cl_mem, const size_t, const size_t, @@ -110,6 +133,26 @@ template StatusCode PUBLIC_API Rot<double>(const size_t, const double, cl_command_queue*, cl_event*); +// Apply modified givens plane rotation: SROTM/DROTM +template <typename T> +StatusCode Rotm(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Rotm<float>(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Rotm<double>(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> StatusCode Swap(const size_t n, diff --git a/src/clblast_c.cc b/src/clblast_c.cc index d36b2695..b530732c 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -26,61 +26,117 @@ using double2 = clblast::double2; // ================================================================================================= // ROTG -StatusCode CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, - cl_command_queue* queue, cl_event* event) { - auto status = clblast::Rotg<float>(SA_buffer, SA_offset, - SB_buffer, SB_offset, - C_buffer, C_offset, - S_buffer, S_offset, +StatusCode CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotg<float>(sa_buffer, sa_offset, + sb_buffer, sb_offset, + sc_buffer, sc_offset, + ss_buffer, ss_offset, queue, event); return static_cast<StatusCode>(status); } -StatusCode CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, +StatusCode CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, cl_command_queue* queue, cl_event* event) { - auto status = clblast::Rotg<double>(SA_buffer, SA_offset, - SB_buffer, SB_offset, - C_buffer, C_offset, - S_buffer, S_offset, + auto status = clblast::Rotg<double>(sa_buffer, sa_offset, + sb_buffer, sb_offset, + sc_buffer, sc_offset, + ss_buffer, ss_offset, queue, event); return static_cast<StatusCode>(status); } +// ROTMG +StatusCode CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotmg<float>(sd1_buffer, sd1_offset, + sd2_buffer, sd2_offset, + sx1_buffer, sx1_offset, + sy1_buffer, sy1_offset, + sparam_buffer, sparam_offset, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotmg<double>(sd1_buffer, sd1_offset, + sd2_buffer, sd2_offset, + sx1_buffer, sx1_offset, + sy1_buffer, sy1_offset, + sparam_buffer, sparam_offset, + queue, event); + return static_cast<StatusCode>(status); +} + // ROT StatusCode CLBlastSrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const float C, - const float S, + const float cos, + const float sin, cl_command_queue* queue, cl_event* event) { auto status = clblast::Rot(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, - C, - S, + cos, + sin, queue, event); return static_cast<StatusCode>(status); } StatusCode CLBlastDrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const double C, - const double S, + const double cos, + const double sin, cl_command_queue* queue, cl_event* event) { auto status = clblast::Rot(n, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, - C, - S, + cos, + sin, queue, event); return static_cast<StatusCode>(status); } +// ROTM +StatusCode CLBlastSrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotm<float>(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + sparam_buffer, sparam_offset, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotm<double>(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + sparam_buffer, sparam_offset, + queue, event); + return static_cast<StatusCode>(status); +} + // SWAP StatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, diff --git a/test/correctness/routines/level1/xrotm.cc b/test/correctness/routines/level1/xrotm.cc new file mode 100644 index 00000000..869056ef --- /dev/null +++ b/test/correctness/routines/level1/xrotm.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xrotm.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXrotm<float>, float, float>(argc, argv, false, "SROTM"); + clblast::RunTests<clblast::TestXrotm<double>, double, double>(argc, argv, true, "DROTM"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/routines/level1/xrotmg.cc b/test/correctness/routines/level1/xrotmg.cc new file mode 100644 index 00000000..29f8b0e1 --- /dev/null +++ b/test/correctness/routines/level1/xrotmg.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xrotmg.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXrotmg<float>, float, float>(argc, argv, false, "SROTMG"); + clblast::RunTests<clblast::TestXrotmg<double>, double, double>(argc, argv, true, "DROTMG"); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xrotm.cc b/test/performance/routines/level1/xrotm.cc new file mode 100644 index 00000000..7af94d0f --- /dev/null +++ b/test/performance/routines/level1/xrotm.cc @@ -0,0 +1,33 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xrotm.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXrotm<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXrotm<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xrotmg.cc b/test/performance/routines/level1/xrotmg.cc new file mode 100644 index 00000000..a326347b --- /dev/null +++ b/test/performance/routines/level1/xrotmg.cc @@ -0,0 +1,33 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xrotmg.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXrotmg<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXrotmg<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } + return 0; +} + +// ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 553e3e66..259aa27c 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -27,69 +27,144 @@ namespace clblast { // Forwards the clBLAS calls for SROTG/DROTG template <typename T> -clblasStatus clblasXrotg(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, +clblasStatus clblasXrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> -clblasStatus clblasXrotg<float>(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, +clblasStatus clblasXrotg<float>(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSrotg(SA_buffer, SA_offset, - SB_buffer, SB_offset, - C_buffer, C_offset, - S_buffer, S_offset, + return clblasSrotg(sa_buffer, sa_offset, + sb_buffer, sb_offset, + sc_buffer, sc_offset, + ss_buffer, ss_offset, num_queues, queues, num_wait_events, wait_events, events); } template <> -clblasStatus clblasXrotg<double>(cl_mem SA_buffer, const size_t SA_offset, - cl_mem SB_buffer, const size_t SB_offset, - cl_mem C_buffer, const size_t C_offset, - cl_mem S_buffer, const size_t S_offset, +clblasStatus clblasXrotg<double>(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDrotg(SA_buffer, SA_offset, - SB_buffer, SB_offset, - C_buffer, C_offset, - S_buffer, S_offset, + return clblasDrotg(sa_buffer, sa_offset, + sb_buffer, sb_offset, + sc_buffer, sc_offset, + ss_buffer, ss_offset, num_queues, queues, num_wait_events, wait_events, events); } +// Forwards the clBLAS calls for SROTMG/DROTMG +template <typename T> +clblasStatus clblasXrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXrotmg<float>(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSrotmg(sd1_buffer, sd1_offset, + sd2_buffer, sd2_offset, + sx1_buffer, sx1_offset, + sy1_buffer, sy1_offset, + sparam_buffer, sparam_offset, + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXrotmg<double>(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDrotmg(sd1_buffer, sd1_offset, + sd2_buffer, sd2_offset, + sx1_buffer, sx1_offset, + sy1_buffer, sy1_offset, + sparam_buffer, sparam_offset, + num_queues, queues, num_wait_events, wait_events, events); +} + // Forwards the clBLAS calls for SROT/DROT clblasStatus clblasXrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const float C, - const float S, + const float cos, + const float sin, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSrot(n, x_buffer, x_offset, static_cast<int>(x_inc), y_buffer, y_offset, static_cast<int>(y_inc), - C, - S, + cos, + sin, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXrot(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - const double C, - const double S, + const double cos, + const double sin, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDrot(n, x_buffer, x_offset, static_cast<int>(x_inc), y_buffer, y_offset, static_cast<int>(y_inc), - C, - S, + cos, + sin, num_queues, queues, num_wait_events, wait_events, events); } +// Forwards the clBLAS calls for SROTM/DROTM +template <typename T> +clblasStatus clblasXrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXrotm<float>(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSrotm(n, + x_buffer, x_offset, static_cast<int>(x_inc), + y_buffer, y_offset, static_cast<int>(y_inc), + sparam_buffer, sparam_offset, + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXrotm<double>(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDrotm(n, + x_buffer, x_offset, static_cast<int>(x_inc), + y_buffer, y_offset, static_cast<int>(y_inc), + sparam_buffer, sparam_offset, + num_queues, queues, num_wait_events, wait_events, events); +} + // Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> clblasStatus clblasXswap(const size_t n, |