diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/clblast.h | 26 | ||||
-rw-r--r-- | include/clblast_c.h | 38 | ||||
-rw-r--r-- | include/internal/database.h | 1 | ||||
-rw-r--r-- | include/internal/database/xdot.h | 113 | ||||
-rw-r--r-- | include/internal/routines/level1/xdot.h | 58 | ||||
-rw-r--r-- | include/internal/routines/level1/xdotc.h | 44 | ||||
-rw-r--r-- | include/internal/routines/level1/xdotu.h | 44 |
7 files changed, 324 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h index 326c9ec2..72825e0b 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -68,6 +68,8 @@ enum class StatusCode { kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device + kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer + kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small }; // Matrix layout and transpose types @@ -114,6 +116,30 @@ StatusCode Axpy(const size_t n, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); +// Dot product of two vectors: SDOT/DDOT +template <typename T> +StatusCode Dot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors: CDOTU/ZDOTU +template <typename T> +StatusCode Dotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC +template <typename T> +StatusCode Dotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/include/clblast_c.h b/include/clblast_c.h index 766570e6..88754990 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -64,6 +64,8 @@ typedef enum StatusCode_ { kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device + kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer + kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small } StatusCode; // Matrix layout and transpose types @@ -157,6 +159,42 @@ StatusCode CLBlastZaxpy(const size_t n, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event); +// Dot product of two vectors: SDOT/DDOT +StatusCode CLBlastSdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastDdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors: CDOTU/ZDOTU +StatusCode CLBlastCdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastZdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC +StatusCode CLBlastCdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastZdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/include/internal/database.h b/include/internal/database.h index 8c937e34..1ac0e646 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -67,6 +67,7 @@ class Database { // The database consists of separate database entries, stored together in a vector static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; + static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h new file mode 100644 index 00000000..05841eb7 --- /dev/null +++ b/include/internal/database/xdot.h @@ -0,0 +1,113 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file populates the database with best-found tuning parameters for the Xdot kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::XdotSingle = { + "Xdot", Precision::kSingle, { + { // NVIDIA GPUs + kDeviceTypeGPU, kDeviceVendorNVIDIA, { + } + }, + { // AMD GPUs + kDeviceTypeGPU, kDeviceVendorAMD, { + } + }, + { // Intel GPUs + kDeviceTypeGPU, kDeviceVendorIntel, { + { "Iris", { {"WGS1",512}, {"WGS2",512} } }, + } + }, + { // Default + kDeviceTypeAll, kDeviceVendorAll, { + { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XdotDouble = { + "Xdot", Precision::kDouble, { + { // NVIDIA GPUs + kDeviceTypeGPU, kDeviceVendorNVIDIA, { + } + }, + { // AMD GPUs + kDeviceTypeGPU, kDeviceVendorAMD, { + } + }, + { // Intel GPUs + kDeviceTypeGPU, kDeviceVendorIntel, { + } + }, + { // Default + kDeviceTypeAll, kDeviceVendorAll, { + { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + } + }, + } +}; +// ================================================================================================= + +const Database::DatabaseEntry Database::XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, { + { // NVIDIA GPUs + kDeviceTypeGPU, kDeviceVendorNVIDIA, { + } + }, + { // AMD GPUs + kDeviceTypeGPU, kDeviceVendorAMD, { + } + }, + { // Intel GPUs + kDeviceTypeGPU, kDeviceVendorIntel, { + { "Iris", { {"WGS1",512}, {"WGS2",512} } }, + } + }, + { // Default + kDeviceTypeAll, kDeviceVendorAll, { + { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XdotComplexDouble = { + "Xdot", Precision::kComplexDouble, { + { // NVIDIA GPUs + kDeviceTypeGPU, kDeviceVendorNVIDIA, { + } + }, + { // AMD GPUs + kDeviceTypeGPU, kDeviceVendorAMD, { + } + }, + { // Intel GPUs + kDeviceTypeGPU, kDeviceVendorIntel, { + } + }, + { // Default + kDeviceTypeAll, kDeviceVendorAll, { + { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h new file mode 100644 index 00000000..64b62945 --- /dev/null +++ b/include/internal/routines/level1/xdot.h @@ -0,0 +1,58 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdot routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOT_H_ +#define CLBLAST_ROUTINES_XDOT_H_ + +#include "internal/routine.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdot: public Routine<T> { + public: + + // Members and methods from the base class + using Routine<T>::db_; + using Routine<T>::source_string_; + using Routine<T>::queue_; + using Routine<T>::context_; + using Routine<T>::GetProgramFromCache; + using Routine<T>::TestVectorX; + using Routine<T>::TestVectorY; + using Routine<T>::TestVectorDot; + using Routine<T>::RunKernel; + using Routine<T>::ErrorIn; + + // Constructor + Xdot(Queue &queue, Event &event, const std::string &name = "DOT"); + + // Templated-precision implementation of the routine + StatusCode DoDot(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const bool do_conjugate = false); + + private: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOT_H_ +#endif diff --git a/include/internal/routines/level1/xdotc.h b/include/internal/routines/level1/xdotc.h new file mode 100644 index 00000000..726cec7c --- /dev/null +++ b/include/internal/routines/level1/xdotc.h @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdotc routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOTC_H_ +#define CLBLAST_ROUTINES_XDOTC_H_ + +#include "internal/routines/level1/xdot.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdotc: public Xdot<T> { + public: + + // Uses the regular Xdot routine + using Xdot<T>::DoDot; + + // Constructor + Xdotc(Queue &queue, Event &event, const std::string &name = "DOTC"); + + // Templated-precision implementation of the routine + StatusCode DoDotc(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOTC_H_ +#endif diff --git a/include/internal/routines/level1/xdotu.h b/include/internal/routines/level1/xdotu.h new file mode 100644 index 00000000..825ebb78 --- /dev/null +++ b/include/internal/routines/level1/xdotu.h @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdotu routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOTU_H_ +#define CLBLAST_ROUTINES_XDOTU_H_ + +#include "internal/routines/level1/xdot.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdotu: public Xdot<T> { + public: + + // Uses the regular Xdot routine + using Xdot<T>::DoDot; + + // Constructor + Xdotu(Queue &queue, Event &event, const std::string &name = "DOTU"); + + // Templated-precision implementation of the routine + StatusCode DoDotu(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOTU_H_ +#endif |