summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/internal/database.h14
-rw-r--r--include/internal/database/xger.h88
-rw-r--r--include/internal/routines/level2/xger.h58
-rw-r--r--include/internal/routines/level2/xgerc.h46
-rw-r--r--include/internal/routines/level2/xgeru.h46
-rw-r--r--include/internal/routines/level2/xher.h61
-rw-r--r--include/internal/routines/level2/xhpr.h45
-rw-r--r--include/internal/routines/level2/xspr.h45
-rw-r--r--include/internal/routines/level2/xsyr.h45
-rw-r--r--include/internal/utilities.h3
10 files changed, 442 insertions, 9 deletions
diff --git a/include/internal/database.h b/include/internal/database.h
index 08e449fa..ca79fdad 100644
--- a/include/internal/database.h
+++ b/include/internal/database.h
@@ -57,22 +57,20 @@ class Database {
// The OpenCL device vendors
static constexpr auto kDeviceVendorAll = "default";
- static constexpr auto kDeviceVendorIntel = "Intel";
- static constexpr auto kDeviceVendorAMD = "AMD";
- static constexpr auto kDeviceVendorNVIDIA = "NVIDIA";
- // Alternative names for the above vendors
+ // Alternative names for some OpenCL vendors
const std::unordered_map<std::string,std::string> kVendorNames {
- {"Intel(R) Corporation", kDeviceVendorIntel},
- {"GenuineIntel", kDeviceVendorIntel},
- {"Advanced Micro Devices, Inc.", kDeviceVendorAMD},
- {"NVIDIA Corporation", kDeviceVendorNVIDIA},
+ {"Intel(R) Corporation", "Intel"},
+ {"GenuineIntel", "Intel"},
+ {"Advanced Micro Devices, Inc.", "AMD"},
+ {"NVIDIA Corporation", "NVIDIA"},
};
// The database consists of separate database entries, stored together in a vector
static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble;
static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
+ static const DatabaseEntry XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
static const DatabaseEntry PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h
new file mode 100644
index 00000000..c9cfb6cd
--- /dev/null
+++ b/include/internal/database/xger.h
@@ -0,0 +1,88 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Database generator <database.py>
+//
+// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgerSingle = {
+ "Xger", Precision::kSingle, {
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
+ { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgerComplexSingle = {
+ "Xger", Precision::kComplexSingle, {
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
+ { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgerDouble = {
+ "Xger", Precision::kDouble, {
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
+ { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XgerComplexDouble = {
+ "Xger", Precision::kComplexDouble, {
+ { // Intel CPUs
+ kDeviceTypeCPU, "Intel", {
+ { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } },
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h
new file mode 100644
index 00000000..45ecea10
--- /dev/null
+++ b/include/internal/routines/level2/xger.h
@@ -0,0 +1,58 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xger routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGER_H_
+#define CLBLAST_ROUTINES_XGER_H_
+
+#include "internal/routine.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xger: public Routine<T> {
+ public:
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestVectorY;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xger(Queue &queue, Event &event, const std::string &name = "GER");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGer(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+
+ private:
+ // Static variable to get the precision
+ const static Precision precision_;
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGER_H_
+#endif
diff --git a/include/internal/routines/level2/xgerc.h b/include/internal/routines/level2/xgerc.h
new file mode 100644
index 00000000..8e515a14
--- /dev/null
+++ b/include/internal/routines/level2/xgerc.h
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgerc routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGERC_H_
+#define CLBLAST_ROUTINES_XGERC_H_
+
+#include "internal/routines/level2/xger.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgerc: public Xger<T> {
+ public:
+
+ // Uses the regular Xger routine
+ using Xger<T>::DoGer;
+
+ // Constructor
+ Xgerc(Queue &queue, Event &event, const std::string &name = "GERC");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGERC_H_
+#endif
diff --git a/include/internal/routines/level2/xgeru.h b/include/internal/routines/level2/xgeru.h
new file mode 100644
index 00000000..ec485c37
--- /dev/null
+++ b/include/internal/routines/level2/xgeru.h
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgeru routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGERU_H_
+#define CLBLAST_ROUTINES_XGERU_H_
+
+#include "internal/routines/level2/xger.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgeru: public Xger<T> {
+ public:
+
+ // Uses the regular Xger routine
+ using Xger<T>::DoGer;
+
+ // Constructor
+ Xgeru(Queue &queue, Event &event, const std::string &name = "GERU");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGeru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGERU_H_
+#endif
diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h
new file mode 100644
index 00000000..6322265b
--- /dev/null
+++ b/include/internal/routines/level2/xher.h
@@ -0,0 +1,61 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xher routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHER_H_
+#define CLBLAST_ROUTINES_XHER_H_
+
+#include "internal/routine.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T, typename U>
+class Xher: public Routine<T> {
+ public:
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestMatrixA;
+ using Routine<T>::TestMatrixAP;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xher(Queue &queue, Event &event, const std::string &name = "HER");
+
+ // Translates alpha of type 'U' into type 'T'
+ T GetAlpha(const U alpha);
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHer(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed = false);
+
+ private:
+ // Static variable to get the precision
+ const static Precision precision_;
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHER_H_
+#endif
diff --git a/include/internal/routines/level2/xhpr.h b/include/internal/routines/level2/xhpr.h
new file mode 100644
index 00000000..a0c3cb92
--- /dev/null
+++ b/include/internal/routines/level2/xhpr.h
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhpr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHPR_H_
+#define CLBLAST_ROUTINES_XHPR_H_
+
+#include "internal/routines/level2/xher.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T, typename U>
+class Xhpr: public Xher<T,U> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,U>::DoHer;
+
+ // Constructor
+ Xhpr(Queue &queue, Event &event, const std::string &name = "HPR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHPR_H_
+#endif
diff --git a/include/internal/routines/level2/xspr.h b/include/internal/routines/level2/xspr.h
new file mode 100644
index 00000000..5b01d2cb
--- /dev/null
+++ b/include/internal/routines/level2/xspr.h
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xspr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSPR_H_
+#define CLBLAST_ROUTINES_XSPR_H_
+
+#include "internal/routines/level2/xher.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xspr: public Xher<T,T> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,T>::DoHer;
+
+ // Constructor
+ Xspr(Queue &queue, Event &event, const std::string &name = "SPR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSPR_H_
+#endif
diff --git a/include/internal/routines/level2/xsyr.h b/include/internal/routines/level2/xsyr.h
new file mode 100644
index 00000000..9704a881
--- /dev/null
+++ b/include/internal/routines/level2/xsyr.h
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsyr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSYR_H_
+#define CLBLAST_ROUTINES_XSYR_H_
+
+#include "internal/routines/level2/xher.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsyr: public Xher<T,T> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,T>::DoHer;
+
+ // Constructor
+ Xsyr(Queue &queue, Event &event, const std::string &name = "SYR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSyr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSYR_H_
+#endif
diff --git a/include/internal/utilities.h b/include/internal/utilities.h
index ed17271f..b6307a85 100644
--- a/include/internal/utilities.h
+++ b/include/internal/utilities.h
@@ -171,7 +171,8 @@ T GetArgument(const int argc, char *argv[], std::string &help,
const std::string &option, const T default_value);
// Returns the precision only
-Precision GetPrecision(const int argc, char *argv[]);
+Precision GetPrecision(const int argc, char *argv[],
+ const Precision default_precision = Precision::kSingle);
// As in "GetArgument", but now only checks whether an argument is given or not
bool CheckArgument(const int argc, char *argv[], std::string &help, const std::string &option);