summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clblast.h17
-rw-r--r--include/clblast_c.h26
-rw-r--r--scripts/generator/generator.py4
-rw-r--r--scripts/generator/routine.py49
-rw-r--r--src/clblast.cc43
-rw-r--r--src/clblast_c.cc56
-rw-r--r--test/correctness/routines/level1/xrot.cc26
-rw-r--r--test/correctness/routines/level1/xrotg.cc26
-rw-r--r--test/performance/routines/level1/xrot.cc33
-rw-r--r--test/performance/routines/level1/xrotg.cc33
-rw-r--r--test/wrapper_clblas.h65
11 files changed, 357 insertions, 21 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 2d03b096..a5fd30f8 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -87,6 +87,23 @@ enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64,
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// Generate plane rotation: SROTG/DROTG
+template <typename T>
+StatusCode Rotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
+// Apply plane rotation: SROT/DROT
+template <typename T>
+StatusCode Rot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const T C,
+ const T S,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
StatusCode Swap(const size_t n,
diff --git a/include/clblast_c.h b/include/clblast_c.h
index c5395e51..be5bab57 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -96,6 +96,32 @@ typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64,
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// Generate plane rotation: SROTG/DROTG
+StatusCode PUBLIC_API CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_command_queue* queue, cl_event* event);
+
+// Apply plane rotation: SROT/DROT
+StatusCode PUBLIC_API CLBlastSrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const float C,
+ const float S,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const double C,
+ const double S,
+ cl_command_queue* queue, cl_event* event);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
StatusCode PUBLIC_API CLBlastSswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 5163b1ca..7191cba1 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -52,8 +52,8 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for
# Populates a list of routines
routines = [
[ # Level 1: vector-vector
- #Routine(False, "1", "rotg", T, [S,D], [], [], [], [], ["a","b","c","s"], False, "Generate plane rotation"),
- #Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["c","s"], False, "Apply plane rotation"),
+ Routine(False, "1", "rotg", T, [S,D], [], [], [], ["SA","SB","C","S"], [], False, "Generate plane rotation"),
+ Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["C","S"], False, "Apply plane rotation"),
Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"),
Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"),
Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"),
diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py
index 1086cecc..d74def25 100644
--- a/scripts/generator/routine.py
+++ b/scripts/generator/routine.py
@@ -39,9 +39,6 @@ def OptionToWrapper(x):
'diagonal': "clblasDiag",
}[x]
-# Buffers without 'ld' or 'inc' parameter
-NO_LD_INC = ["dot","ap"]
-
# ==================================================================================================
# Class holding routine-specific information (e.g. name, which arguments, which precisions)
@@ -61,6 +58,14 @@ class Routine():
self.scratch = scratch # Scratch buffer (e.g. for xDOT)
self.description = description
+ # List of scalar buffers
+ def ScalarBuffers(self):
+ return ["SA","SB","C","S","dot"]
+
+ # List of buffers without 'inc' or 'ld'
+ def BuffersWithoutLdInc(self):
+ return self.ScalarBuffers() + ["ap"]
+
# Retrieves the number of characters in the routine's name
def Length(self):
return len(self.name)
@@ -94,7 +99,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [name+"_buffer"]
b = [name+"_offset"]
- c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
+ c = [name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else []
return [", ".join(a+b+c)]
return []
@@ -104,7 +109,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [prefix+"cl_mem "+name+"_buffer"]
b = ["const size_t "+name+"_offset"]
- c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
+ c = ["const size_t "+name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else []
return [", ".join(a+b+c)]
return []
@@ -113,7 +118,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = ["Buffer<"+self.template.buffertype+">("+name+"_buffer)"]
b = [name+"_offset"]
- c = [name+"_"+self.Postfix(name)] if (name not in NO_LD_INC) else []
+ c = [name+"_"+self.Postfix(name)] if (name not in self.BuffersWithoutLdInc()) else []
return [", ".join(a+b+c)]
return []
@@ -136,7 +141,7 @@ class Routine():
if (name in self.inputs) or (name in self.outputs):
a = [prefix+"cl_mem"]
b = ["const size_t"]
- c = ["const size_t"] if (name not in NO_LD_INC) else []
+ c = ["const size_t"] if (name not in self.BuffersWithoutLdInc()) else []
return [", ".join(a+b+c)]
return []
@@ -252,57 +257,63 @@ class Routine():
# Retrieves a combination of all the argument names, with Claduc casts
def ArgumentsCladuc(self, flavour, indent):
- return (self.Options() + self.Sizes() + self.BufferCladuc("dot") +
+ return (self.Options() + self.Sizes() +
+ list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffers()])) +
self.Scalar("alpha") +
list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) +
self.Scalar("beta") +
list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.Scalar(s) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.Scalar(s) for s in ["C","S"]])))
# Retrieves a combination of all the argument names, with CLBlast casts
def ArgumentsCast(self, flavour, indent):
- return (self.OptionsCast(indent) + self.Sizes() + self.Buffer("dot") +
+ return (self.OptionsCast(indent) + self.Sizes() +
+ list(chain(*[self.Buffer(b) for b in self.ScalarBuffers()])) +
self.ScalarUse("alpha", flavour) +
list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) +
self.ScalarUse("beta", flavour) +
list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarUse(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.ScalarUse(s, flavour) for s in ["C","S"]])))
# As above, but for the clBLAS wrapper
def ArgumentsWrapper(self, flavour):
- return (self.Options() + self.Sizes() + self.BufferWrapper("dot") +
+ return (self.Options() + self.Sizes() +
+ list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffers()])) +
self.ScalarUseWrapper("alpha", flavour) +
list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) +
self.ScalarUseWrapper("beta", flavour) +
list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["C","S"]])))
# Retrieves a combination of all the argument definitions
def ArgumentsDef(self, flavour):
- return (self.OptionsDef() + self.SizesDef() + self.BufferDef("dot") +
+ return (self.OptionsDef() + self.SizesDef() +
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) +
self.ScalarDef("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
self.ScalarDef("beta", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarDef(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.ScalarDef(s, flavour) for s in ["C","S"]])))
# As above, but clBLAS wrapper plain datatypes
def ArgumentsDefWrapper(self, flavour):
- return (self.OptionsDefWrapper() + self.SizesDef() + self.BufferDef("dot") +
+ return (self.OptionsDefWrapper() + self.SizesDef() +
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) +
self.ScalarDefPlain("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
self.ScalarDefPlain("beta", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["C","S"]])))
# Retrieves a combination of all the argument types
def ArgumentsType(self, flavour):
- return (self.OptionsType() + self.SizesType() + self.BufferType("dot") +
+ return (self.OptionsType() + self.SizesType() +
+ list(chain(*[self.BufferType(b) for b in self.ScalarBuffers()])) +
self.ScalarType("alpha", flavour) +
list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) +
self.ScalarType("beta", flavour) +
list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarType(s, flavour) for s in ["d1","d2","a","b","c","s"]])))
+ list(chain(*[self.ScalarType(s, flavour) for s in ["C","S"]])))
# ==============================================================================================
diff --git a/src/clblast.cc b/src/clblast.cc
index 0b8de40a..8f7abfd6 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -67,6 +67,49 @@ namespace clblast {
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// Generate plane rotation: SROTG/DROTG
+template <typename T>
+StatusCode Rotg(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*) {
+ return StatusCode::kNotImplemented;
+}
+template StatusCode PUBLIC_API Rotg<float>(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Rotg<double>(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+
+// Apply plane rotation: SROT/DROT
+template <typename T>
+StatusCode Rot(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ const T,
+ const T,
+ cl_command_queue*, cl_event*) {
+ return StatusCode::kNotImplemented;
+}
+template StatusCode PUBLIC_API Rot<float>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ const float,
+ const float,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Rot<double>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ const double,
+ const double,
+ cl_command_queue*, cl_event*);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
StatusCode Swap(const size_t n,
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 66d16f6d..d36b2695 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -25,6 +25,62 @@ using double2 = clblast::double2;
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// ROTG
+StatusCode CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotg<float>(SA_buffer, SA_offset,
+ SB_buffer, SB_offset,
+ C_buffer, C_offset,
+ S_buffer, S_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotg<double>(SA_buffer, SA_offset,
+ SB_buffer, SB_offset,
+ C_buffer, C_offset,
+ S_buffer, S_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// ROT
+StatusCode CLBlastSrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const float C,
+ const float S,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rot(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ C,
+ S,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const double C,
+ const double S,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rot(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ C,
+ S,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
// SWAP
StatusCode CLBlastSswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
diff --git a/test/correctness/routines/level1/xrot.cc b/test/correctness/routines/level1/xrot.cc
new file mode 100644
index 00000000..4020ff13
--- /dev/null
+++ b/test/correctness/routines/level1/xrot.cc
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "correctness/testblas.h"
+#include "routines/level1/xrot.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ clblast::RunTests<clblast::TestXrot<float>, float, float>(argc, argv, false, "SROT");
+ clblast::RunTests<clblast::TestXrot<double>, double, double>(argc, argv, true, "DROT");
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/correctness/routines/level1/xrotg.cc b/test/correctness/routines/level1/xrotg.cc
new file mode 100644
index 00000000..dd068992
--- /dev/null
+++ b/test/correctness/routines/level1/xrotg.cc
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "correctness/testblas.h"
+#include "routines/level1/xrotg.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ clblast::RunTests<clblast::TestXrotg<float>, float, float>(argc, argv, false, "SROTG");
+ clblast::RunTests<clblast::TestXrotg<double>, double, double>(argc, argv, true, "DROTG");
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/performance/routines/level1/xrot.cc b/test/performance/routines/level1/xrot.cc
new file mode 100644
index 00000000..3ff59ace
--- /dev/null
+++ b/test/performance/routines/level1/xrot.cc
@@ -0,0 +1,33 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "performance/client.h"
+#include "routines/level1/xrot.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
+ case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kSingle:
+ clblast::RunClient<clblast::TestXrot<float>, float, float>(argc, argv); break;
+ case clblast::Precision::kDouble:
+ clblast::RunClient<clblast::TestXrot<double>, double, double>(argc, argv); break;
+ case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode");
+ }
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/performance/routines/level1/xrotg.cc b/test/performance/routines/level1/xrotg.cc
new file mode 100644
index 00000000..0320c314
--- /dev/null
+++ b/test/performance/routines/level1/xrotg.cc
@@ -0,0 +1,33 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "performance/client.h"
+#include "routines/level1/xrotg.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
+ case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kSingle:
+ clblast::RunClient<clblast::TestXrotg<float>, float, float>(argc, argv); break;
+ case clblast::Precision::kDouble:
+ clblast::RunClient<clblast::TestXrotg<double>, double, double>(argc, argv); break;
+ case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode");
+ }
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 23a02a45..553e3e66 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -25,6 +25,71 @@ namespace clblast {
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// Forwards the clBLAS calls for SROTG/DROTG
+template <typename T>
+clblasStatus clblasXrotg(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
+template <>
+clblasStatus clblasXrotg<float>(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSrotg(SA_buffer, SA_offset,
+ SB_buffer, SB_offset,
+ C_buffer, C_offset,
+ S_buffer, S_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXrotg<double>(cl_mem SA_buffer, const size_t SA_offset,
+ cl_mem SB_buffer, const size_t SB_offset,
+ cl_mem C_buffer, const size_t C_offset,
+ cl_mem S_buffer, const size_t S_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDrotg(SA_buffer, SA_offset,
+ SB_buffer, SB_offset,
+ C_buffer, C_offset,
+ S_buffer, S_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
+// Forwards the clBLAS calls for SROT/DROT
+clblasStatus clblasXrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const float C,
+ const float S,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSrot(n,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ y_buffer, y_offset, static_cast<int>(y_inc),
+ C,
+ S,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+clblasStatus clblasXrot(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const double C,
+ const double S,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDrot(n,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ y_buffer, y_offset, static_cast<int>(y_inc),
+ C,
+ S,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
// Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
clblasStatus clblasXswap(const size_t n,