summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-09-14 16:57:00 +0200
committerCNugteren <web@cedricnugteren.nl>2015-09-14 16:57:00 +0200
commita2e726d3bd4294f1eae1735f6ba23105dccc6b10 (patch)
tree2685c3204460a300088b13af66128cdda8e64c68 /include
parent2a383f34501b386b8e6c4beb56c6ac694622f060 (diff)
Added xDOT/xDOTU/xDOTC dot-product routines
Diffstat (limited to 'include')
-rw-r--r--include/clblast.h26
-rw-r--r--include/clblast_c.h38
-rw-r--r--include/internal/database.h1
-rw-r--r--include/internal/database/xdot.h113
-rw-r--r--include/internal/routines/level1/xdot.h58
-rw-r--r--include/internal/routines/level1/xdotc.h44
-rw-r--r--include/internal/routines/level1/xdotu.h44
7 files changed, 324 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 326c9ec2..72825e0b 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -68,6 +68,8 @@ enum class StatusCode {
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
+ kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer
+ kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small
};
// Matrix layout and transpose types
@@ -114,6 +116,30 @@ StatusCode Axpy(const size_t n,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
+// Dot product of two vectors: SDOT/DDOT
+template <typename T>
+StatusCode Dot(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Dot product of two complex vectors: CDOTU/ZDOTU
+template <typename T>
+StatusCode Dotu(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
+template <typename T>
+StatusCode Dotc(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
diff --git a/include/clblast_c.h b/include/clblast_c.h
index 766570e6..88754990 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -64,6 +64,8 @@ typedef enum StatusCode_ {
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
+ kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer
+ kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small
} StatusCode;
// Matrix layout and transpose types
@@ -157,6 +159,42 @@ StatusCode CLBlastZaxpy(const size_t n,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
+// Dot product of two vectors: SDOT/DDOT
+StatusCode CLBlastSdot(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDdot(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Dot product of two complex vectors: CDOTU/ZDOTU
+StatusCode CLBlastCdotu(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZdotu(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
+StatusCode CLBlastCdotc(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZdotc(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
diff --git a/include/internal/database.h b/include/internal/database.h
index 8c937e34..1ac0e646 100644
--- a/include/internal/database.h
+++ b/include/internal/database.h
@@ -67,6 +67,7 @@ class Database {
// The database consists of separate database entries, stored together in a vector
static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble;
+ static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h
new file mode 100644
index 00000000..05841eb7
--- /dev/null
+++ b/include/internal/database/xdot.h
@@ -0,0 +1,113 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file populates the database with best-found tuning parameters for the Xdot kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XdotSingle = {
+ "Xdot", Precision::kSingle, {
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
+ }
+ },
+ { // AMD GPUs
+ kDeviceTypeGPU, kDeviceVendorAMD, {
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, kDeviceVendorIntel, {
+ { "Iris", { {"WGS1",512}, {"WGS2",512} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XdotDouble = {
+ "Xdot", Precision::kDouble, {
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
+ }
+ },
+ { // AMD GPUs
+ kDeviceTypeGPU, kDeviceVendorAMD, {
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, kDeviceVendorIntel, {
+ }
+ },
+ { // Default
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } },
+ }
+ },
+ }
+};
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XdotComplexSingle = {
+ "Xdot", Precision::kComplexSingle, {
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
+ }
+ },
+ { // AMD GPUs
+ kDeviceTypeGPU, kDeviceVendorAMD, {
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, kDeviceVendorIntel, {
+ { "Iris", { {"WGS1",512}, {"WGS2",512} } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::XdotComplexDouble = {
+ "Xdot", Precision::kComplexDouble, {
+ { // NVIDIA GPUs
+ kDeviceTypeGPU, kDeviceVendorNVIDIA, {
+ }
+ },
+ { // AMD GPUs
+ kDeviceTypeGPU, kDeviceVendorAMD, {
+ }
+ },
+ { // Intel GPUs
+ kDeviceTypeGPU, kDeviceVendorIntel, {
+ }
+ },
+ { // Default
+ kDeviceTypeAll, kDeviceVendorAll, {
+ { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h
new file mode 100644
index 00000000..64b62945
--- /dev/null
+++ b/include/internal/routines/level1/xdot.h
@@ -0,0 +1,58 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdot routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOT_H_
+#define CLBLAST_ROUTINES_XDOT_H_
+
+#include "internal/routine.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdot: public Routine<T> {
+ public:
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestVectorY;
+ using Routine<T>::TestVectorDot;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xdot(Queue &queue, Event &event, const std::string &name = "DOT");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoDot(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const bool do_conjugate = false);
+
+ private:
+ // Static variable to get the precision
+ const static Precision precision_;
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOT_H_
+#endif
diff --git a/include/internal/routines/level1/xdotc.h b/include/internal/routines/level1/xdotc.h
new file mode 100644
index 00000000..726cec7c
--- /dev/null
+++ b/include/internal/routines/level1/xdotc.h
@@ -0,0 +1,44 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdotc routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOTC_H_
+#define CLBLAST_ROUTINES_XDOTC_H_
+
+#include "internal/routines/level1/xdot.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdotc: public Xdot<T> {
+ public:
+
+ // Uses the regular Xdot routine
+ using Xdot<T>::DoDot;
+
+ // Constructor
+ Xdotc(Queue &queue, Event &event, const std::string &name = "DOTC");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoDotc(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOTC_H_
+#endif
diff --git a/include/internal/routines/level1/xdotu.h b/include/internal/routines/level1/xdotu.h
new file mode 100644
index 00000000..825ebb78
--- /dev/null
+++ b/include/internal/routines/level1/xdotu.h
@@ -0,0 +1,44 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdotu routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOTU_H_
+#define CLBLAST_ROUTINES_XDOTU_H_
+
+#include "internal/routines/level1/xdot.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdotu: public Xdot<T> {
+ public:
+
+ // Uses the regular Xdot routine
+ using Xdot<T>::DoDot;
+
+ // Constructor
+ Xdotu(Queue &queue, Event &event, const std::string &name = "DOTU");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoDotu(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOTU_H_
+#endif