summaryrefslogtreecommitdiff
path: root/include/clblast.h
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-10 14:35:11 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-10 14:35:11 +0100
commit3d2ef9331bd01bfc31d6725576c8ccb4eadbbc1c (patch)
tree8835c9dc1f02c58e8a9485a94d27fd2af123dc82 /include/clblast.h
parent0bdc51e47c97ef8c810d89cc8b2307bea4cbe852 (diff)
Fixed a few things for the new tuning API
Diffstat (limited to 'include/clblast.h')
-rw-r--r--include/clblast.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 34efc746..5129022e 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -705,46 +705,57 @@ StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::s
// =================================================================================================
+// Tunes the "Xaxpy" kernel, used for many level-1 routines such as XAXPY, XCOPY, and XSWAP
template <typename T>
StatusCode PUBLIC_API TuneXaxpy(cl_command_queue* queue, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Xdot" kernel, used for level-1 reduction routines such as XDOT, XMAX, and XSUM
template <typename T>
StatusCode PUBLIC_API TuneXdot(cl_command_queue* queue, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Xgemv" kernel, used for matrix-vector level-2 routines such as XGEMV, XGBMV, and XHEMV
template <typename T>
StatusCode PUBLIC_API TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Xger" kernel, used for matrix update level-2 routines such as XGER, XHER, and XSYR2
template <typename T>
StatusCode PUBLIC_API TuneXger(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Xgemm" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "XgemmDiret" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Copy" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TuneCopy(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Pad" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TunePad(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Transpose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Padtranspose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
template <typename T>
StatusCode PUBLIC_API TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters);
+// Tunes the "Xgemm" kernel, used for the level-3 routine XTRSM
template <typename T>
StatusCode PUBLIC_API TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
const double fraction, std::unordered_map<std::string,size_t> &parameters);