diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 14:35:11 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 14:35:11 +0100 |
commit | 3d2ef9331bd01bfc31d6725576c8ccb4eadbbc1c (patch) | |
tree | 8835c9dc1f02c58e8a9485a94d27fd2af123dc82 /include/clblast.h | |
parent | 0bdc51e47c97ef8c810d89cc8b2307bea4cbe852 (diff) |
Fixed a few things for the new tuning API
Diffstat (limited to 'include/clblast.h')
-rw-r--r-- | include/clblast.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h index 34efc746..5129022e 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -705,46 +705,57 @@ StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::s // ================================================================================================= +// Tunes the "Xaxpy" kernel, used for many level-1 routines such as XAXPY, XCOPY, and XSWAP template <typename T> StatusCode PUBLIC_API TuneXaxpy(cl_command_queue* queue, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Xdot" kernel, used for level-1 reduction routines such as XDOT, XMAX, and XSUM template <typename T> StatusCode PUBLIC_API TuneXdot(cl_command_queue* queue, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Xgemv" kernel, used for matrix-vector level-2 routines such as XGEMV, XGBMV, and XHEMV template <typename T> StatusCode PUBLIC_API TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Xger" kernel, used for matrix update level-2 routines such as XGER, XHER, and XSYR2 template <typename T> StatusCode PUBLIC_API TuneXger(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Xgemm" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "XgemmDiret" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Copy" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TuneCopy(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Pad" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TunePad(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Transpose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Padtranspose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K template <typename T> StatusCode PUBLIC_API TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +// Tunes the "Xgemm" kernel, used for the level-3 routine XTRSM template <typename T> StatusCode PUBLIC_API TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, const double fraction, std::unordered_map<std::string,size_t> ¶meters); |