diff options
-rw-r--r-- | include/clblast.h | 17 | ||||
-rw-r--r-- | include/clblast_c.h | 26 | ||||
-rw-r--r-- | scripts/generator/generator.py | 4 | ||||
-rw-r--r-- | scripts/generator/routine.py | 49 | ||||
-rw-r--r-- | src/clblast.cc | 43 | ||||
-rw-r--r-- | src/clblast_c.cc | 56 | ||||
-rw-r--r-- | test/correctness/routines/level1/xrot.cc | 26 | ||||
-rw-r--r-- | test/correctness/routines/level1/xrotg.cc | 26 | ||||
-rw-r--r-- | test/performance/routines/level1/xrot.cc | 33 | ||||
-rw-r--r-- | test/performance/routines/level1/xrotg.cc | 33 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 65 |
11 files changed, 357 insertions, 21 deletions
diff --git a/include/clblast.h b/include/clblast.h index 2d03b096..a5fd30f8 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -87,6 +87,23 @@ enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64, // BLAS level-1 (vector-vector) routines // ================================================================================================= +// Generate plane rotation: SROTG/DROTG +template <typename T> +StatusCode Rotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_command_queue* queue, cl_event* event = nullptr); + +// Apply plane rotation: SROT/DROT +template <typename T> +StatusCode Rot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const T C, + const T S, + cl_command_queue* queue, cl_event* event = nullptr); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> StatusCode Swap(const size_t n, diff --git a/include/clblast_c.h b/include/clblast_c.h index c5395e51..be5bab57 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -96,6 +96,32 @@ typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64, // BLAS level-1 (vector-vector) routines // ================================================================================================= +// Generate plane rotation: SROTG/DROTG +StatusCode PUBLIC_API CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_command_queue* queue, cl_event* event); + +// Apply plane rotation: SROT/DROT +StatusCode PUBLIC_API CLBlastSrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float C, + const float S, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double C, + const double S, + cl_command_queue* queue, cl_event* event); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP StatusCode PUBLIC_API CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 5163b1ca..7191cba1 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -52,8 +52,8 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for # Populates a list of routines routines = [ [ # Level 1: vector-vector - #Routine(False, "1", "rotg", T, [S,D], [], [], [], [], ["a","b","c","s"], False, "Generate plane rotation"), - #Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["c","s"], False, "Apply plane rotation"), + Routine(False, "1", "rotg", T, [S,D], [], [], [], ["SA","SB","C","S"], [], False, "Generate plane rotation"), + Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["C","S"], False, "Apply plane rotation"), Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"), Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"), Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"), diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index 1086cecc..d74def25 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -39,9 +39,6 @@ def OptionToWrapper(x): 'diagonal': "clblasDiag", }[x] -# Buffers without 'ld' or 'inc' parameter -NO_LD_INC = ["dot","ap"] - # ================================================================================================== # Class holding routine-specific information (e.g. name, which arguments, which precisions) @@ -61,6 +58,14 @@ class Routine(): self.scratch = scratch # Scratch buffer (e.g. for xDOT) self.description = description + # List of scalar buffers + def ScalarBuffers(self): + return ["SA","SB","C","S","dot"] + + # List of buffers without 'inc' or 'ld' + def BuffersWithoutLdInc(self): + return self.ScalarBuffers() + ["ap"] + # Retrieves the number of characters in the routine's name def Length(self): return len(self.name) @@ -94,7 +99,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [name+"_buffer"] b = [name+"_offset"] - c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] + c = [name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else [] return [", ".join(a+b+c)] return [] @@ -104,7 +109,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [prefix+"cl_mem "+name+"_buffer"] b = ["const size_t "+name+"_offset"] - c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] + c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else [] return [", ".join(a+b+c)] return [] @@ -113,7 +118,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = ["Buffer<"+self.template.buffertype+">("+name+"_buffer)"] b = [name+"_offset"] - c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else [] + c = [name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else [] return [", ".join(a+b+c)] return [] @@ -136,7 +141,7 @@ class Routine(): if (name in self.inputs) or (name in self.outputs): a = [prefix+"cl_mem"] b = ["const size_t"] - c = ["const size_t"] if (name not in NO_LD_INC) else [] + c = ["const size_t"] if (name not in self.BuffersWithoutLdInc()) else [] return [", ".join(a+b+c)] return [] @@ -252,57 +257,63 @@ class Routine(): # Retrieves a combination of all the argument names, with Claduc casts def ArgumentsCladuc(self, flavour, indent): - return (self.Options() + self.Sizes() + self.BufferCladuc("dot") + + return (self.Options() + self.Sizes() + + list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffers()])) + self.Scalar("alpha") + list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) + self.Scalar("beta") + list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) + - list(chain(*[self.Scalar(s) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.Scalar(s) for s in ["C","S"]]))) # Retrieves a combination of all the argument names, with CLBlast casts def ArgumentsCast(self, flavour, indent): - return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") + + return (self.OptionsCast(indent) + self.Sizes() + + list(chain(*[self.Buffer(b) for b in self.ScalarBuffers()])) + self.ScalarUse("alpha", flavour) + list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) + self.ScalarUse("beta", flavour) + list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarUse(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.ScalarUse(s, flavour) for s in ["C","S"]]))) # As above, but for the clBLAS wrapper def ArgumentsWrapper(self, flavour): - return (self.Options() + self.Sizes() + self.BufferWrapper("dot") + + return (self.Options() + self.Sizes() + + list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffers()])) + self.ScalarUseWrapper("alpha", flavour) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) + self.ScalarUseWrapper("beta", flavour) + list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["C","S"]]))) # Retrieves a combination of all the argument definitions def ArgumentsDef(self, flavour): - return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") + + return (self.OptionsDef() + self.SizesDef() + + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) + self.ScalarDef("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDef("beta", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarDef(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.ScalarDef(s, flavour) for s in ["C","S"]]))) # As above, but clBLAS wrapper plain datatypes def ArgumentsDefWrapper(self, flavour): - return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") + + return (self.OptionsDefWrapper() + self.SizesDef() + + list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) + self.ScalarDefPlain("alpha", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) + self.ScalarDefPlain("beta", flavour) + list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["C","S"]]))) # Retrieves a combination of all the argument types def ArgumentsType(self, flavour): - return (self.OptionsType() + self.SizesType() + self.BufferType("dot") + + return (self.OptionsType() + self.SizesType() + + list(chain(*[self.BufferType(b) for b in self.ScalarBuffers()])) + self.ScalarType("alpha", flavour) + list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) + self.ScalarType("beta", flavour) + list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) + - list(chain(*[self.ScalarType(s, flavour) for s in ["d1","d2","a","b","c","s"]]))) + list(chain(*[self.ScalarType(s, flavour) for s in ["C","S"]]))) # ============================================================================================== diff --git a/src/clblast.cc b/src/clblast.cc index 0b8de40a..8f7abfd6 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -67,6 +67,49 @@ namespace clblast { // BLAS level-1 (vector-vector) routines // ================================================================================================= +// Generate plane rotation: SROTG/DROTG +template <typename T> +StatusCode Rotg(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Rotg<float>(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Rotg<double>(cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); + +// Apply plane rotation: SROT/DROT +template <typename T> +StatusCode Rot(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + const T, + const T, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Rot<float>(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + const float, + const float, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Rot<double>(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + const double, + const double, + cl_command_queue*, cl_event*); + // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> StatusCode Swap(const size_t n, diff --git a/src/clblast_c.cc b/src/clblast_c.cc index 66d16f6d..d36b2695 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -25,6 +25,62 @@ using double2 = clblast::double2; // BLAS level-1 (vector-vector) routines // ================================================================================================= +// ROTG +StatusCode CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotg<float>(SA_buffer, SA_offset, + SB_buffer, SB_offset, + C_buffer, C_offset, + S_buffer, S_offset, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rotg<double>(SA_buffer, SA_offset, + SB_buffer, SB_offset, + C_buffer, C_offset, + S_buffer, S_offset, + queue, event); + return static_cast<StatusCode>(status); +} + +// ROT +StatusCode CLBlastSrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float C, + const float S, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rot(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + C, + S, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double C, + const double S, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Rot(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + C, + S, + queue, event); + return static_cast<StatusCode>(status); +} + // SWAP StatusCode CLBlastSswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, diff --git a/test/correctness/routines/level1/xrot.cc b/test/correctness/routines/level1/xrot.cc new file mode 100644 index 00000000..4020ff13 --- /dev/null +++ b/test/correctness/routines/level1/xrot.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xrot.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXrot<float>, float, float>(argc, argv, false, "SROT"); + clblast::RunTests<clblast::TestXrot<double>, double, double>(argc, argv, true, "DROT"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/routines/level1/xrotg.cc b/test/correctness/routines/level1/xrotg.cc new file mode 100644 index 00000000..dd068992 --- /dev/null +++ b/test/correctness/routines/level1/xrotg.cc @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xrotg.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXrotg<float>, float, float>(argc, argv, false, "SROTG"); + clblast::RunTests<clblast::TestXrotg<double>, double, double>(argc, argv, true, "DROTG"); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xrot.cc b/test/performance/routines/level1/xrot.cc new file mode 100644 index 00000000..3ff59ace --- /dev/null +++ b/test/performance/routines/level1/xrot.cc @@ -0,0 +1,33 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xrot.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXrot<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXrot<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xrotg.cc b/test/performance/routines/level1/xrotg.cc new file mode 100644 index 00000000..0320c314 --- /dev/null +++ b/test/performance/routines/level1/xrotg.cc @@ -0,0 +1,33 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xrotg.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXrotg<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXrotg<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } + return 0; +} + +// ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 23a02a45..553e3e66 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -25,6 +25,71 @@ namespace clblast { // BLAS level-1 (vector-vector) routines // ================================================================================================= +// Forwards the clBLAS calls for SROTG/DROTG +template <typename T> +clblasStatus clblasXrotg(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXrotg<float>(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSrotg(SA_buffer, SA_offset, + SB_buffer, SB_offset, + C_buffer, C_offset, + S_buffer, S_offset, + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXrotg<double>(cl_mem SA_buffer, const size_t SA_offset, + cl_mem SB_buffer, const size_t SB_offset, + cl_mem C_buffer, const size_t C_offset, + cl_mem S_buffer, const size_t S_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDrotg(SA_buffer, SA_offset, + SB_buffer, SB_offset, + C_buffer, C_offset, + S_buffer, S_offset, + num_queues, queues, num_wait_events, wait_events, events); +} + +// Forwards the clBLAS calls for SROT/DROT +clblasStatus clblasXrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float C, + const float S, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSrot(n, + x_buffer, x_offset, static_cast<int>(x_inc), + y_buffer, y_offset, static_cast<int>(y_inc), + C, + S, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double C, + const double S, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDrot(n, + x_buffer, x_offset, static_cast<int>(x_inc), + y_buffer, y_offset, static_cast<int>(y_inc), + C, + S, + num_queues, queues, num_wait_events, wait_events, events); +} + // Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP template <typename T> clblasStatus clblasXswap(const size_t n, |