diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-06-16 18:07:46 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-06-16 18:07:46 +0200 |
commit | 52ccaf5b25e14c9ce032315e5e96b1f27886d481 (patch) | |
tree | 087288b7aebf2a06ffc4e7dcbcd4353f7a3be6a7 /include | |
parent | 39b7dbc5e37829abfbcfb77852b9138b31540b42 (diff) |
Added XOMATCOPY routines to perform out-of-place matrix scaling, copying, and/or transposing
Diffstat (limited to 'include')
-rw-r--r-- | include/clblast.h | 13 | ||||
-rw-r--r-- | include/clblast_c.h | 36 | ||||
-rw-r--r-- | include/internal/routine.h | 1 | ||||
-rw-r--r-- | include/internal/routines/levelx/xomatcopy.h | 54 |
4 files changed, 104 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h index 64b2610a..31a07423 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -560,6 +560,19 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c cl_command_queue* queue, cl_event* event = nullptr); // ================================================================================================= +// Extra non-BLAS routines (level-X) +// ================================================================================================= + +// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY +template <typename T> +StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event = nullptr); + +// ================================================================================================= // CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on // for the same device. This cache can be cleared to free up system memory or in case of debugging. diff --git a/include/clblast_c.h b/include/clblast_c.h index 40248615..3ac6d99c 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -1266,6 +1266,42 @@ StatusCode PUBLIC_API CLBlastHtrsm(const Layout layout, const Side side, const T cl_command_queue* queue, cl_event* event); // ================================================================================================= +// Extra non-BLAS routines (level-X) +// ================================================================================================= + +// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY +StatusCode PUBLIC_API CLBlastSomatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastDomatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastComatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastZomatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastHomatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= // CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on // for the same device. This cache can be cleared to free up system memory or in case of debugging. diff --git a/include/internal/routine.h b/include/internal/routine.h index d420e2db..35837575 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -83,6 +83,7 @@ class Routine { const size_t dest_one, const size_t dest_two, const size_t dest_ld, const size_t dest_offset, const Buffer<T> &dest, + const T alpha, const Program &program, const bool do_pad, const bool do_transpose, const bool do_conjugate, const bool upper = false, const bool lower = false, diff --git a/include/internal/routines/levelx/xomatcopy.h b/include/internal/routines/levelx/xomatcopy.h new file mode 100644 index 00000000..38df846e --- /dev/null +++ b/include/internal/routines/levelx/xomatcopy.h @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xomatcopy routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XOMATCOPY_H_ +#define CLBLAST_ROUTINES_XOMATCOPY_H_ + +#include "internal/routine.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xomatcopy: public Routine<T> { + public: + + // Members and methods from the base class + using Routine<T>::source_string_; + using Routine<T>::event_; + using Routine<T>::GetProgramFromCache; + using Routine<T>::PadCopyTransposeMatrix; + using Routine<T>::TestMatrixA; + using Routine<T>::TestMatrixB; + using Routine<T>::ErrorIn; + + // Constructor + Xomatcopy(Queue &queue, EventPointer event, const std::string &name = "OMATCOPY"); + + // Templated-precision implementation of the routine + StatusCode DoOmatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld); + + private: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XOMATCOPY_H_ +#endif |