summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-03-30 17:32:13 +0200
committercnugteren <web@cedricnugteren.nl>2016-03-30 16:13:37 -0700
commitc1df78676471a8a26ea8ec5a092734566d490db4 (patch)
tree272bd2cda6a54929f83e4d2c6c7a0652384f88ec
parent6ecc0d089c80296cce3089734771279a30783f81 (diff)
Added prototypes for the xROTM and xROTMG routines
-rw-r--r--include/clblast.h33
-rw-r--r--include/clblast_c.h58
-rw-r--r--scripts/generator/generator.py6
-rw-r--r--scripts/generator/routine.py42
-rw-r--r--src/clblast.cc47
-rw-r--r--src/clblast_c.cc106
-rw-r--r--test/correctness/routines/level1/xrotm.cc26
-rw-r--r--test/correctness/routines/level1/xrotmg.cc26
-rw-r--r--test/performance/routines/level1/xrotm.cc33
-rw-r--r--test/performance/routines/level1/xrotmg.cc33
-rw-r--r--test/wrapper_clblas.h131
11 files changed, 445 insertions, 96 deletions
diff --git a/include/clblast.h b/include/clblast.h
index a5fd30f8..ac16188f 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -87,23 +87,40 @@ enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64,
// BLAS level-1 (vector-vector) routines
// =================================================================================================
-// Generate plane rotation: SROTG/DROTG
+// Generate givens plane rotation: SROTG/DROTG
template <typename T>
-StatusCode Rotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
+StatusCode Rotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
cl_command_queue* queue, cl_event* event = nullptr);
-// Apply plane rotation: SROT/DROT
+// Generate modified givens plane rotation: SROTMG/DROTMG
+template <typename T>
+StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
+// Apply givens plane rotation: SROT/DROT
template <typename T>
StatusCode Rot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const T C,
- const T S,
+ const T cos,
+ const T sin,
cl_command_queue* queue, cl_event* event = nullptr);
+// Apply modified givens plane rotation: SROTM/DROTM
+template <typename T>
+StatusCode Rotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
StatusCode Swap(const size_t n,
diff --git a/include/clblast_c.h b/include/clblast_c.h
index be5bab57..a5563951 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -96,32 +96,58 @@ typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64,
// BLAS level-1 (vector-vector) routines
// =================================================================================================
-// Generate plane rotation: SROTG/DROTG
-StatusCode PUBLIC_API CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
- cl_command_queue* queue, cl_event* event);
-StatusCode PUBLIC_API CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
- cl_command_queue* queue, cl_event* event);
+// Generate givens plane rotation: SROTG/DROTG
+StatusCode PUBLIC_API CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
+ cl_command_queue* queue, cl_event* event);
+
+// Generate modified givens plane rotation: SROTMG/DROTMG
+StatusCode PUBLIC_API CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event);
-// Apply plane rotation: SROT/DROT
+// Apply givens plane rotation: SROT/DROT
StatusCode PUBLIC_API CLBlastSrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const float C,
- const float S,
+ const float cos,
+ const float sin,
cl_command_queue* queue, cl_event* event);
StatusCode PUBLIC_API CLBlastDrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const double C,
- const double S,
+ const double cos,
+ const double sin,
cl_command_queue* queue, cl_event* event);
+// Apply modified givens plane rotation: SROTM/DROTM
+StatusCode PUBLIC_API CLBlastSrotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDrotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
StatusCode PUBLIC_API CLBlastSswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 7191cba1..1eada753 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -52,8 +52,10 @@ TU = DataType("typename T, typename U", "T,U", ["T", "U", "T", "U"], "T") # for
# Populates a list of routines
routines = [
[ # Level 1: vector-vector
- Routine(False, "1", "rotg", T, [S,D], [], [], [], ["SA","SB","C","S"], [], False, "Generate plane rotation"),
- Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["C","S"], False, "Apply plane rotation"),
+ Routine(False, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], [], False, "Generate givens plane rotation"),
+ Routine(False, "1", "rotmg", T, [S,D], [], [], [], ["sd1","sd2","sx1","sy1","sparam"], [], False, "Generate modified givens plane rotation"),
+ Routine(False, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["cos","sin"], False, "Apply givens plane rotation"),
+ Routine(False, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [], False, "Apply modified givens plane rotation"),
Routine(True, "1", "swap", T, [S,D,C,Z], ["n"], [], [], ["x","y"], [], False, "Swap two vectors"),
Routine(True, "1", "scal", T, [S,D,C,Z], ["n"], [], [], ["x"], ["alpha"], False, "Vector scaling"),
Routine(True, "1", "copy", T, [S,D,C,Z], ["n"], [], ["x"], ["y"], [], False, "Vector copy"),
diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py
index d74def25..0a61490b 100644
--- a/scripts/generator/routine.py
+++ b/scripts/generator/routine.py
@@ -59,12 +59,18 @@ class Routine():
self.description = description
# List of scalar buffers
- def ScalarBuffers(self):
- return ["SA","SB","C","S","dot"]
+ def ScalarBuffersFirst(self):
+ return ["dot"]
+ def ScalarBuffersSecond(self):
+ return ["sa","sb","sc","ss","sd1","sd2","sx1","sy1","sparam"]
+
+ # List of scalars other than alpha and beta
+ def OtherScalars(self):
+ return ["cos","sin"]
# List of buffers without 'inc' or 'ld'
def BuffersWithoutLdInc(self):
- return self.ScalarBuffers() + ["ap"]
+ return self.ScalarBuffersFirst() + self.ScalarBuffersSecond() + ["ap"]
# Retrieves the number of characters in the routine's name
def Length(self):
@@ -258,62 +264,68 @@ class Routine():
# Retrieves a combination of all the argument names, with Claduc casts
def ArgumentsCladuc(self, flavour, indent):
return (self.Options() + self.Sizes() +
- list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffersFirst()])) +
self.Scalar("alpha") +
list(chain(*[self.BufferCladuc(b) for b in self.BuffersFirst()])) +
self.Scalar("beta") +
list(chain(*[self.BufferCladuc(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.Scalar(s) for s in ["C","S"]])))
+ list(chain(*[self.BufferCladuc(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.Scalar(s) for s in self.OtherScalars()])))
# Retrieves a combination of all the argument names, with CLBlast casts
def ArgumentsCast(self, flavour, indent):
return (self.OptionsCast(indent) + self.Sizes() +
- list(chain(*[self.Buffer(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.Buffer(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarUse("alpha", flavour) +
list(chain(*[self.Buffer(b) for b in self.BuffersFirst()])) +
self.ScalarUse("beta", flavour) +
list(chain(*[self.Buffer(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarUse(s, flavour) for s in ["C","S"]])))
+ list(chain(*[self.Buffer(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarUse(s, flavour) for s in self.OtherScalars()])))
# As above, but for the clBLAS wrapper
def ArgumentsWrapper(self, flavour):
return (self.Options() + self.Sizes() +
- list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarUseWrapper("alpha", flavour) +
list(chain(*[self.BufferWrapper(b) for b in self.BuffersFirst()])) +
self.ScalarUseWrapper("beta", flavour) +
list(chain(*[self.BufferWrapper(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarUseWrapper(s, flavour) for s in ["C","S"]])))
+ list(chain(*[self.BufferWrapper(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarUseWrapper(s, flavour) for s in self.OtherScalars()])))
# Retrieves a combination of all the argument definitions
def ArgumentsDef(self, flavour):
return (self.OptionsDef() + self.SizesDef() +
- list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarDef("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
self.ScalarDef("beta", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarDef(s, flavour) for s in ["C","S"]])))
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarDef(s, flavour) for s in self.OtherScalars()])))
# As above, but clBLAS wrapper plain datatypes
def ArgumentsDefWrapper(self, flavour):
return (self.OptionsDefWrapper() + self.SizesDef() +
- list(chain(*[self.BufferDef(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarDefPlain("alpha", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersFirst()])) +
self.ScalarDefPlain("beta", flavour) +
list(chain(*[self.BufferDef(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarDefPlain(s, flavour) for s in ["C","S"]])))
+ list(chain(*[self.BufferDef(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarDefPlain(s, flavour) for s in self.OtherScalars()])))
# Retrieves a combination of all the argument types
def ArgumentsType(self, flavour):
return (self.OptionsType() + self.SizesType() +
- list(chain(*[self.BufferType(b) for b in self.ScalarBuffers()])) +
+ list(chain(*[self.BufferType(b) for b in self.ScalarBuffersFirst()])) +
self.ScalarType("alpha", flavour) +
list(chain(*[self.BufferType(b) for b in self.BuffersFirst()])) +
self.ScalarType("beta", flavour) +
list(chain(*[self.BufferType(b) for b in self.BuffersSecond()])) +
- list(chain(*[self.ScalarType(s, flavour) for s in ["C","S"]])))
+ list(chain(*[self.BufferType(b) for b in self.ScalarBuffersSecond()])) +
+ list(chain(*[self.ScalarType(s, flavour) for s in self.OtherScalars()])))
# ==============================================================================================
diff --git a/src/clblast.cc b/src/clblast.cc
index 8f7abfd6..1b2c3a12 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -67,7 +67,7 @@ namespace clblast {
// BLAS level-1 (vector-vector) routines
// =================================================================================================
-// Generate plane rotation: SROTG/DROTG
+// Generate givens plane rotation: SROTG/DROTG
template <typename T>
StatusCode Rotg(cl_mem, const size_t,
cl_mem, const size_t,
@@ -87,7 +87,30 @@ template StatusCode PUBLIC_API Rotg<double>(cl_mem, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
-// Apply plane rotation: SROT/DROT
+// Generate modified givens plane rotation: SROTMG/DROTMG
+template <typename T>
+StatusCode Rotmg(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*) {
+ return StatusCode::kNotImplemented;
+}
+template StatusCode PUBLIC_API Rotmg<float>(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Rotmg<double>(cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+
+// Apply givens plane rotation: SROT/DROT
template <typename T>
StatusCode Rot(const size_t,
cl_mem, const size_t, const size_t,
@@ -110,6 +133,26 @@ template StatusCode PUBLIC_API Rot<double>(const size_t,
const double,
cl_command_queue*, cl_event*);
+// Apply modified givens plane rotation: SROTM/DROTM
+template <typename T>
+StatusCode Rotm(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*) {
+ return StatusCode::kNotImplemented;
+}
+template StatusCode PUBLIC_API Rotm<float>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Rotm<double>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
+
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
StatusCode Swap(const size_t n,
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index d36b2695..b530732c 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -26,61 +26,117 @@ using double2 = clblast::double2;
// =================================================================================================
// ROTG
-StatusCode CLBlastSrotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
- cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotg<float>(SA_buffer, SA_offset,
- SB_buffer, SB_offset,
- C_buffer, C_offset,
- S_buffer, S_offset,
+StatusCode CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotg<float>(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
queue, event);
return static_cast<StatusCode>(status);
}
-StatusCode CLBlastDrotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
+StatusCode CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotg<double>(SA_buffer, SA_offset,
- SB_buffer, SB_offset,
- C_buffer, C_offset,
- S_buffer, S_offset,
+ auto status = clblast::Rotg<double>(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
queue, event);
return static_cast<StatusCode>(status);
}
+// ROTMG
+StatusCode CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotmg<float>(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotmg<double>(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
// ROT
StatusCode CLBlastSrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const float C,
- const float S,
+ const float cos,
+ const float sin,
cl_command_queue* queue, cl_event* event) {
auto status = clblast::Rot(n,
x_buffer, x_offset, x_inc,
y_buffer, y_offset, y_inc,
- C,
- S,
+ cos,
+ sin,
queue, event);
return static_cast<StatusCode>(status);
}
StatusCode CLBlastDrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const double C,
- const double S,
+ const double cos,
+ const double sin,
cl_command_queue* queue, cl_event* event) {
auto status = clblast::Rot(n,
x_buffer, x_offset, x_inc,
y_buffer, y_offset, y_inc,
- C,
- S,
+ cos,
+ sin,
queue, event);
return static_cast<StatusCode>(status);
}
+// ROTM
+StatusCode CLBlastSrotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotm<float>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ sparam_buffer, sparam_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDrotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Rotm<double>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ sparam_buffer, sparam_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
// SWAP
StatusCode CLBlastSswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
diff --git a/test/correctness/routines/level1/xrotm.cc b/test/correctness/routines/level1/xrotm.cc
new file mode 100644
index 00000000..869056ef
--- /dev/null
+++ b/test/correctness/routines/level1/xrotm.cc
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "correctness/testblas.h"
+#include "routines/level1/xrotm.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ clblast::RunTests<clblast::TestXrotm<float>, float, float>(argc, argv, false, "SROTM");
+ clblast::RunTests<clblast::TestXrotm<double>, double, double>(argc, argv, true, "DROTM");
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/correctness/routines/level1/xrotmg.cc b/test/correctness/routines/level1/xrotmg.cc
new file mode 100644
index 00000000..29f8b0e1
--- /dev/null
+++ b/test/correctness/routines/level1/xrotmg.cc
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "correctness/testblas.h"
+#include "routines/level1/xrotmg.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ clblast::RunTests<clblast::TestXrotmg<float>, float, float>(argc, argv, false, "SROTMG");
+ clblast::RunTests<clblast::TestXrotmg<double>, double, double>(argc, argv, true, "DROTMG");
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/performance/routines/level1/xrotm.cc b/test/performance/routines/level1/xrotm.cc
new file mode 100644
index 00000000..7af94d0f
--- /dev/null
+++ b/test/performance/routines/level1/xrotm.cc
@@ -0,0 +1,33 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "performance/client.h"
+#include "routines/level1/xrotm.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
+ case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kSingle:
+ clblast::RunClient<clblast::TestXrotm<float>, float, float>(argc, argv); break;
+ case clblast::Precision::kDouble:
+ clblast::RunClient<clblast::TestXrotm<double>, double, double>(argc, argv); break;
+ case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode");
+ }
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/performance/routines/level1/xrotmg.cc b/test/performance/routines/level1/xrotmg.cc
new file mode 100644
index 00000000..a326347b
--- /dev/null
+++ b/test/performance/routines/level1/xrotmg.cc
@@ -0,0 +1,33 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// =================================================================================================
+
+#include "performance/client.h"
+#include "routines/level1/xrotmg.h"
+
+// Shortcuts to the clblast namespace
+using float2 = clblast::float2;
+using double2 = clblast::double2;
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
+ case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kSingle:
+ clblast::RunClient<clblast::TestXrotmg<float>, float, float>(argc, argv); break;
+ case clblast::Precision::kDouble:
+ clblast::RunClient<clblast::TestXrotmg<double>, double, double>(argc, argv); break;
+ case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode");
+ }
+ return 0;
+}
+
+// =================================================================================================
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 553e3e66..259aa27c 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -27,69 +27,144 @@ namespace clblast {
// Forwards the clBLAS calls for SROTG/DROTG
template <typename T>
-clblasStatus clblasXrotg(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
+clblasStatus clblasXrotg(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
-clblasStatus clblasXrotg<float>(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
+clblasStatus clblasXrotg<float>(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasSrotg(SA_buffer, SA_offset,
- SB_buffer, SB_offset,
- C_buffer, C_offset,
- S_buffer, S_offset,
+ return clblasSrotg(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
-clblasStatus clblasXrotg<double>(cl_mem SA_buffer, const size_t SA_offset,
- cl_mem SB_buffer, const size_t SB_offset,
- cl_mem C_buffer, const size_t C_offset,
- cl_mem S_buffer, const size_t S_offset,
+clblasStatus clblasXrotg<double>(cl_mem sa_buffer, const size_t sa_offset,
+ cl_mem sb_buffer, const size_t sb_offset,
+ cl_mem sc_buffer, const size_t sc_offset,
+ cl_mem ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasDrotg(SA_buffer, SA_offset,
- SB_buffer, SB_offset,
- C_buffer, C_offset,
- S_buffer, S_offset,
+ return clblasDrotg(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
+// Forwards the clBLAS calls for SROTMG/DROTMG
+template <typename T>
+clblasStatus clblasXrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
+template <>
+clblasStatus clblasXrotmg<float>(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSrotmg(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXrotmg<double>(cl_mem sd1_buffer, const size_t sd1_offset,
+ cl_mem sd2_buffer, const size_t sd2_offset,
+ cl_mem sx1_buffer, const size_t sx1_offset,
+ cl_mem sy1_buffer, const size_t sy1_offset,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDrotmg(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
// Forwards the clBLAS calls for SROT/DROT
clblasStatus clblasXrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const float C,
- const float S,
+ const float cos,
+ const float sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSrot(n,
x_buffer, x_offset, static_cast<int>(x_inc),
y_buffer, y_offset, static_cast<int>(y_inc),
- C,
- S,
+ cos,
+ sin,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- const double C,
- const double S,
+ const double cos,
+ const double sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDrot(n,
x_buffer, x_offset, static_cast<int>(x_inc),
y_buffer, y_offset, static_cast<int>(y_inc),
- C,
- S,
+ cos,
+ sin,
num_queues, queues, num_wait_events, wait_events, events);
}
+// Forwards the clBLAS calls for SROTM/DROTM
+template <typename T>
+clblasStatus clblasXrotm(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
+template <>
+clblasStatus clblasXrotm<float>(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasSrotm(n,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ y_buffer, y_offset, static_cast<int>(y_inc),
+ sparam_buffer, sparam_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+template <>
+clblasStatus clblasXrotm<double>(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem sparam_buffer, const size_t sparam_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasDrotm(n,
+ x_buffer, x_offset, static_cast<int>(x_inc),
+ y_buffer, y_offset, static_cast<int>(y_inc),
+ sparam_buffer, sparam_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+}
+
// Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
clblasStatus clblasXswap(const size_t n,