summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-16 10:56:37 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-16 10:56:37 +0200
commit066af4069ba5c92decc7652e5c6d36c27849ccab (patch)
tree6e86ca648d9b59acf920255b66ebe57106935d9a /src/routines
parentc87e877bf23d2fe38a7da2898e1734a3cdeaf48c (diff)
Removed an unused variable from the copy-transpose-pad function
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/common.hpp2
-rw-r--r--src/routines/level3/xgemm.cpp8
-rw-r--r--src/routines/level3/xher2k.cpp12
-rw-r--r--src/routines/level3/xherk.cpp8
-rw-r--r--src/routines/level3/xsyr2k.cpp8
-rw-r--r--src/routines/level3/xsyrk.cpp6
-rw-r--r--src/routines/levelx/xomatcopy.cpp2
7 files changed, 23 insertions, 23 deletions
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index e624a2b1..d53bdc25 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -41,7 +41,7 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
// Copies or transposes a matrix and optionally pads/unpads it with zeros. This method is also able
// to write to symmetric and triangular matrices through optional arguments.
template <typename T>
-StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device, const Context &context,
+StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device,
const Database &db,
EventPointer event, std::vector<Event>& waitForEvents,
const size_t src_one, const size_t src_two,
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 97e8db7e..0db28537 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -127,7 +127,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// case nothing has to be done, these kernels can be skipped.
if (!a_no_temp) {
auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
a_one, a_two, a_ld, a_offset, a_buffer,
m_ceiled, k_ceiled, m_ceiled, 0, a_temp,
ConstantOne<T>(), program,
@@ -139,7 +139,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// As above, but now for matrix B
if (!b_no_temp) {
auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
b_one, b_two, b_ld, b_offset, b_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
ConstantOne<T>(), program,
@@ -151,7 +151,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// As above, but now for matrix C. This is only necessary if C is used both as input and output.
if (!c_no_temp && beta != static_cast<T>(0)) {
auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
c_one, c_two, c_ld, c_offset, c_buffer,
m_ceiled, n_ceiled, m_ceiled, 0, c_temp,
ConstantOne<T>(), program,
@@ -190,7 +190,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// Runs the post-processing kernel if needed
if (!c_no_temp) {
eventWaitList.push_back(eventKernel);
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
m_ceiled, n_ceiled, m_ceiled, 0, c_temp,
c_one, c_two, c_ld, c_offset, c_buffer,
ConstantOne<T>(), program,
diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp
index 65e2be55..1ba6080f 100644
--- a/src/routines/level3/xher2k.cpp
+++ b/src/routines/level3/xher2k.cpp
@@ -119,7 +119,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
// case nothing has to be done, these kernels can be skipped.
if (!a1_no_temp) {
auto eventProcessA1 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA1.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA1.pointer(), emptyEventList,
ab_one, ab_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, a1_temp,
ConstantOne<T>(), program,
@@ -129,7 +129,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
}
if (!a2_no_temp) {
auto eventProcessA2 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA2.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA2.pointer(), emptyEventList,
ab_one, ab_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, a2_temp,
ConstantOne<T>(), program,
@@ -139,7 +139,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
}
if (!b1_no_temp) {
auto eventProcessB1 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB1.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB1.pointer(), emptyEventList,
ab_one, ab_two, b_ld, b_offset, b_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, b1_temp,
ConstantOne<T>(), program,
@@ -149,7 +149,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
}
if (!b2_no_temp) {
auto eventProcessB2 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB2.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB2.pointer(), emptyEventList,
ab_one, ab_two, b_ld, b_offset, b_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, b2_temp,
ConstantOne<T>(), program,
@@ -161,7 +161,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
// Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
// modify the other triangle.
auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
n, n, c_ld, c_offset, c_buffer,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
ConstantOne<T>(), program,
@@ -212,7 +212,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
// Runs the post-processing kernel
auto upper = (triangle == Triangle::kUpper);
auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
n, n, c_ld, c_offset, c_buffer,
ConstantOne<T>(), program,
diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp
index cc87e3e9..0fa1b7b1 100644
--- a/src/routines/level3/xherk.cpp
+++ b/src/routines/level3/xherk.cpp
@@ -111,7 +111,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
// case nothing has to be done, these kernels can be skipped. Two copies are created.
if (!a_no_temp) {
auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
a_one, a_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
ConstantOne<T>(), program,
@@ -121,7 +121,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
}
if (!b_no_temp) {
auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
a_one, a_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
ConstantOne<T>(), program,
@@ -133,7 +133,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
// Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
// modify the other triangle.
auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
n, n, c_ld, c_offset, c_buffer,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
ConstantOne<T>(), program,
@@ -170,7 +170,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
// Runs the post-processing kernel
auto upper = (triangle == Triangle::kUpper);
auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
n, n, c_ld, c_offset, c_buffer,
ConstantOne<T>(), program,
diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp
index 18a1eac7..5a90a5a2 100644
--- a/src/routines/level3/xsyr2k.cpp
+++ b/src/routines/level3/xsyr2k.cpp
@@ -106,7 +106,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
// case nothing has to be done, these kernels can be skipped.
if (!a_no_temp) {
auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
ab_one, ab_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
ConstantOne<T>(), program,
@@ -116,7 +116,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
}
if (!b_no_temp) {
auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
ab_one, ab_two, b_ld, b_offset, b_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
ConstantOne<T>(), program,
@@ -128,7 +128,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
// Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
// modify the other triangle.
auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
n, n, c_ld, c_offset, c_buffer,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
ConstantOne<T>(), program,
@@ -177,7 +177,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
// Runs the post-processing kernel
auto upper = (triangle == Triangle::kUpper);
auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
n, n, c_ld, c_offset, c_buffer,
ConstantOne<T>(), program,
diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp
index 1992cec1..46b96b76 100644
--- a/src/routines/level3/xsyrk.cpp
+++ b/src/routines/level3/xsyrk.cpp
@@ -99,7 +99,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
// case nothing has to be done, these kernels can be skipped.
if (!a_no_temp) {
auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
a_one, a_two, a_ld, a_offset, a_buffer,
n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
ConstantOne<T>(), program,
@@ -111,7 +111,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
// Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
// modify the other triangle.
auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
n, n, c_ld, c_offset, c_buffer,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
ConstantOne<T>(), program,
@@ -148,7 +148,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
// Runs the post-processing kernel
auto upper = (triangle == Triangle::kUpper);
auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
n, n, c_ld, c_offset, c_buffer,
ConstantOne<T>(), program,
diff --git a/src/routines/levelx/xomatcopy.cpp b/src/routines/levelx/xomatcopy.cpp
index e8593301..af9080af 100644
--- a/src/routines/levelx/xomatcopy.cpp
+++ b/src/routines/levelx/xomatcopy.cpp
@@ -72,7 +72,7 @@ StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_trans
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto emptyEventList = std::vector<Event>();
- status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, emptyEventList,
+ status = PadCopyTransposeMatrix(queue_, device_, db_, event_, emptyEventList,
a_one, a_two, a_ld, a_offset, a_buffer,
b_one, b_two, b_ld, b_offset, b_buffer,
alpha, program, false, transpose, conjugate);