summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-09-14 15:53:34 +0200
committerCNugteren <web@cedricnugteren.nl>2015-09-14 15:53:34 +0200
commit2a383f34501b386b8e6c4beb56c6ac694622f060 (patch)
tree782fa45888677b9fe0a1e12f85c6f795308935f2 /src/tuning
parente0c5312abb6f5b1c0e413fe69a8c7c46215cd3ae (diff)
Added extra temporary buffer to tuners in preparation of Xdot routines
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/copy.cc5
-rw-r--r--src/tuning/pad.cc5
-rw-r--r--src/tuning/padtranspose.cc5
-rw-r--r--src/tuning/transpose.cc5
-rw-r--r--src/tuning/xaxpy.cc9
-rw-r--r--src/tuning/xgemm.cc5
-rw-r--r--src/tuning/xgemv.cc5
7 files changed, 30 insertions, 9 deletions
diff --git a/src/tuning/copy.cc b/src/tuning/copy.cc
index f38a28f3..23828b25 100644
--- a/src/tuning/copy.cc
+++ b/src/tuning/copy.cc
@@ -53,6 +53,7 @@ class TuneCopy {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -68,6 +69,7 @@ class TuneCopy {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1, 1}; }
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
@@ -81,7 +83,8 @@ class TuneCopy {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &, std::vector<T> &,
- std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
+ std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentInput(a_mat);
tuner.AddArgumentOutput(b_mat);
diff --git a/src/tuning/pad.cc b/src/tuning/pad.cc
index 2ce566fb..6a826b6b 100644
--- a/src/tuning/pad.cc
+++ b/src/tuning/pad.cc
@@ -53,6 +53,7 @@ class TunePad {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -68,6 +69,7 @@ class TunePad {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1, 1}; }
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
@@ -81,7 +83,8 @@ class TunePad {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &, std::vector<T> &,
- std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
+ std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentScalar(static_cast<int>(args.n));
tuner.AddArgumentScalar(static_cast<int>(args.m));
diff --git a/src/tuning/padtranspose.cc b/src/tuning/padtranspose.cc
index 8d494745..3f233809 100644
--- a/src/tuning/padtranspose.cc
+++ b/src/tuning/padtranspose.cc
@@ -53,6 +53,7 @@ class TunePadTranspose {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -72,6 +73,7 @@ class TunePadTranspose {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1, 1}; }
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
@@ -85,7 +87,8 @@ class TunePadTranspose {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &, std::vector<T> &,
- std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
+ std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentScalar(static_cast<int>(args.n));
tuner.AddArgumentScalar(static_cast<int>(args.m));
diff --git a/src/tuning/transpose.cc b/src/tuning/transpose.cc
index 2ffdb7aa..3998ba66 100644
--- a/src/tuning/transpose.cc
+++ b/src/tuning/transpose.cc
@@ -53,6 +53,7 @@ class TuneTranspose {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -73,6 +74,7 @@ class TuneTranspose {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1, 1}; }
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
@@ -86,7 +88,8 @@ class TuneTranspose {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &, std::vector<T> &,
- std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
+ std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentInput(a_mat);
tuner.AddArgumentOutput(b_mat);
diff --git a/src/tuning/xaxpy.cc b/src/tuning/xaxpy.cc
index 7715b128..31aa6a8e 100644
--- a/src/tuning/xaxpy.cc
+++ b/src/tuning/xaxpy.cc
@@ -53,11 +53,12 @@ class TuneXaxpy {
static double DefaultFraction() { return 1.0; } // N/A for this kernel
// Describes how to obtain the sizes of the buffers
- static size_t GetSizeX(const Arguments<T> &args) { return args.n; } // N/A for this kernel
- static size_t GetSizeY(const Arguments<T> &args) { return args.n; } // N/A for this kernel
+ static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
+ static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
static size_t GetSizeA(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeB(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -72,6 +73,7 @@ class TuneXaxpy {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1}; }
static std::vector<size_t> LocalSizeRef() { return {64}; }
@@ -85,7 +87,8 @@ class TuneXaxpy {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &x_vec, std::vector<T> &y_vec,
- std::vector<T> &, std::vector<T> &, std::vector<T> &) {
+ std::vector<T> &, std::vector<T> &, std::vector<T> &,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.n));
tuner.AddArgumentScalar(args.alpha);
tuner.AddArgumentInput(x_vec);
diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc
index 302f2bd5..e820cfb0 100644
--- a/src/tuning/xgemm.cc
+++ b/src/tuning/xgemm.cc
@@ -55,6 +55,7 @@ class TuneXgemm {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.k; }
static size_t GetSizeB(const Arguments<T> &args) { return args.n * args.k; }
static size_t GetSizeC(const Arguments<T> &args) { return args.m * args.n; }
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -103,6 +104,7 @@ class TuneXgemm {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1, 1}; }
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
@@ -116,7 +118,8 @@ class TuneXgemm {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &, std::vector<T> &,
- std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat) {
+ std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat,
+ std::vector<T> &) {
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentScalar(static_cast<int>(args.n));
tuner.AddArgumentScalar(static_cast<int>(args.k));
diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc
index e22b5103..3d6fe595 100644
--- a/src/tuning/xgemv.cc
+++ b/src/tuning/xgemv.cc
@@ -56,6 +56,7 @@ class TuneXgemv {
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
static size_t GetSizeB(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
+ static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
@@ -75,6 +76,7 @@ class TuneXgemv {
// Sets the base thread configuration
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m}; }
+ static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
static std::vector<size_t> LocalSize() { return {1}; }
static std::vector<size_t> LocalSizeRef() { return {64}; }
@@ -88,7 +90,8 @@ class TuneXgemv {
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
std::vector<T> &x_vec, std::vector<T> &y_vec,
- std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &) {
+ std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &,
+ std::vector<T> &) {
auto a_rotated = (V==3) ? 1 : 0;
tuner.AddArgumentScalar(static_cast<int>(args.m));
tuner.AddArgumentScalar(static_cast<int>(args.n));