diff options
Diffstat (limited to 'src/routines')
87 files changed, 2252 insertions, 42 deletions
diff --git a/src/routines/common.cc b/src/routines/common.cc index 561a1bd8..c378df28 100644 --- a/src/routines/common.cc +++ b/src/routines/common.cc @@ -13,7 +13,7 @@ #include <vector> -#include "internal/routines/common.h" +#include "routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/src/routines/common.hpp b/src/routines/common.hpp new file mode 100644 index 00000000..c99cd39d --- /dev/null +++ b/src/routines/common.hpp @@ -0,0 +1,173 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains all the interfaces to common kernels, such as copying, padding, and +// transposing a matrix. These functions are templated and thus header-only. This file also contains +// other common functions to routines, such as a function to launch a kernel. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_COMMON_H_ +#define CLBLAST_ROUTINES_COMMON_H_ + +#include <string> +#include <vector> + +#include "clblast.h" +#include "clpp11.hpp" +#include "database/database.hpp" + +namespace clblast { +// ================================================================================================= + +// Enqueues a kernel, waits for completion, and checks for errors +StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device, + std::vector<size_t> global, const std::vector<size_t> &local, + EventPointer event, std::vector<Event>& waitForEvents); + +// As above, but without an event waiting list +StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device, + std::vector<size_t> global, const std::vector<size_t> &local, + EventPointer event); + +// ================================================================================================= + +// Copies or transposes a matrix and optionally pads/unpads it with zeros. This method is also able +// to write to symmetric and triangular matrices through optional arguments. +template <typename T> +StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device, const Context &context, + const Database &db, + EventPointer event, std::vector<Event>& waitForEvents, + const size_t src_one, const size_t src_two, + const size_t src_ld, const size_t src_offset, + const Buffer<T> &src, + const size_t dest_one, const size_t dest_two, + const size_t dest_ld, const size_t dest_offset, + const Buffer<T> &dest, + const T alpha, + const Program &program, const bool do_pad, + const bool do_transpose, const bool do_conjugate, + const bool upper = false, const bool lower = false, + const bool diagonal_imag_zero = false) { + + // Determines whether or not the fast-version could potentially be used + auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) && + (src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld) && + (upper == false) && (lower == false) && (diagonal_imag_zero == false); + + // Determines the right kernel + auto kernel_name = std::string{}; + if (do_transpose) { + if (use_fast_kernel && + IsMultiple(src_ld, db["TRA_WPT"]) && + IsMultiple(src_one, db["TRA_WPT"]*db["TRA_WPT"]) && + IsMultiple(src_two, db["TRA_WPT"]*db["TRA_WPT"])) { + kernel_name = "TransposeMatrixFast"; + } + else { + use_fast_kernel = false; + kernel_name = (do_pad) ? "TransposePadMatrix" : "TransposeMatrix"; + } + } + else { + if (use_fast_kernel && + IsMultiple(src_ld, db["COPY_VW"]) && + IsMultiple(src_one, db["COPY_VW"]*db["COPY_DIMX"]) && + IsMultiple(src_two, db["COPY_WPT"]*db["COPY_DIMY"])) { + kernel_name = "CopyMatrixFast"; + } + else { + use_fast_kernel = false; + kernel_name = (do_pad) ? "CopyPadMatrix" : "CopyMatrix"; + } + } + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context, 1); + alpha_buffer.Write(queue, 1, &alpha); + + // Retrieves the kernel from the compiled binary + try { + auto kernel = Kernel(program, kernel_name); + + // Sets the kernel arguments + if (use_fast_kernel) { + kernel.SetArgument(0, static_cast<int>(src_ld)); + kernel.SetArgument(1, src()); + kernel.SetArgument(2, dest()); + kernel.SetArgument(3, alpha_buffer()); + } + else { + kernel.SetArgument(0, static_cast<int>(src_one)); + kernel.SetArgument(1, static_cast<int>(src_two)); + kernel.SetArgument(2, static_cast<int>(src_ld)); + kernel.SetArgument(3, static_cast<int>(src_offset)); + kernel.SetArgument(4, src()); + kernel.SetArgument(5, static_cast<int>(dest_one)); + kernel.SetArgument(6, static_cast<int>(dest_two)); + kernel.SetArgument(7, static_cast<int>(dest_ld)); + kernel.SetArgument(8, static_cast<int>(dest_offset)); + kernel.SetArgument(9, dest()); + kernel.SetArgument(10, alpha_buffer()); + if (do_pad) { + kernel.SetArgument(11, static_cast<int>(do_conjugate)); + } + else { + kernel.SetArgument(11, static_cast<int>(upper)); + kernel.SetArgument(12, static_cast<int>(lower)); + kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero)); + } + } + + // Launches the kernel and returns the error code. Uses global and local thread sizes based on + // parameters in the database. + if (do_transpose) { + if (use_fast_kernel) { + const auto global = std::vector<size_t>{ + dest_one / db["TRA_WPT"], + dest_two / db["TRA_WPT"] + }; + const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]}; + return RunKernel(kernel, queue, device, global, local, event, waitForEvents); + } + else { + const auto global = std::vector<size_t>{ + Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]), + Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"]) + }; + const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]}; + return RunKernel(kernel, queue, device, global, local, event, waitForEvents); + } + } + else { + if (use_fast_kernel) { + const auto global = std::vector<size_t>{ + dest_one / db["COPY_VW"], + dest_two / db["COPY_WPT"] + }; + const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]}; + return RunKernel(kernel, queue, device, global, local, event, waitForEvents); + } + else { + const auto global = std::vector<size_t>{ + Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]), + Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"]) + }; + const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]}; + return RunKernel(kernel, queue, device, global, local, event, waitForEvents); + } + } + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_COMMON_H_ +#endif diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc index b4add2a3..6b6e7f9e 100644 --- a/src/routines/level1/xamax.cc +++ b/src/routines/level1/xamax.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xamax.h" +#include "routines/level1/xamax.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xamax.hpp b/src/routines/level1/xamax.hpp new file mode 100644 index 00000000..aa45a8e4 --- /dev/null +++ b/src/routines/level1/xamax.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xamax routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XAMAX_H_ +#define CLBLAST_ROUTINES_XAMAX_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xamax: public Routine { + public: + + // Constructor + Xamax(Queue &queue, EventPointer event, const std::string &name = "AMAX"); + + // Templated-precision implementation of the routine + StatusCode DoAmax(const size_t n, + const Buffer<unsigned int> &imax_buffer, const size_t imax_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XAMAX_H_ +#endif diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc index 80f04829..0c1ce903 100644 --- a/src/routines/level1/xasum.cc +++ b/src/routines/level1/xasum.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xasum.h" +#include "routines/level1/xasum.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xasum.hpp b/src/routines/level1/xasum.hpp new file mode 100644 index 00000000..5a253f4d --- /dev/null +++ b/src/routines/level1/xasum.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xasum routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XASUM_H_ +#define CLBLAST_ROUTINES_XASUM_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xasum: public Routine { + public: + + // Constructor + Xasum(Queue &queue, EventPointer event, const std::string &name = "ASUM"); + + // Templated-precision implementation of the routine + StatusCode DoAsum(const size_t n, + const Buffer<T> &asum_buffer, const size_t asum_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XASUM_H_ +#endif diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index 4a548757..5b6c9e77 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xaxpy.h" +#include "routines/level1/xaxpy.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xaxpy.hpp b/src/routines/level1/xaxpy.hpp new file mode 100644 index 00000000..caac871e --- /dev/null +++ b/src/routines/level1/xaxpy.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xaxpy routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XAXPY_H_ +#define CLBLAST_ROUTINES_XAXPY_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xaxpy: public Routine { + public: + + // Constructor + Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY"); + + // Templated-precision implementation of the routine + StatusCode DoAxpy(const size_t n, const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XAXPY_H_ +#endif diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc index 92d31786..673ef349 100644 --- a/src/routines/level1/xcopy.cc +++ b/src/routines/level1/xcopy.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xcopy.h" +#include "routines/level1/xcopy.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xcopy.hpp b/src/routines/level1/xcopy.hpp new file mode 100644 index 00000000..0c424ba3 --- /dev/null +++ b/src/routines/level1/xcopy.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xcopy routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XCOPY_H_ +#define CLBLAST_ROUTINES_XCOPY_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xcopy: public Routine { + public: + + // Constructor + Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY"); + + // Templated-precision implementation of the routine + StatusCode DoCopy(const size_t n, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XCOPY_H_ +#endif diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc index 8709c541..bafea157 100644 --- a/src/routines/level1/xdot.cc +++ b/src/routines/level1/xdot.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xdot.h" +#include "routines/level1/xdot.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xdot.hpp b/src/routines/level1/xdot.hpp new file mode 100644 index 00000000..02c1efaa --- /dev/null +++ b/src/routines/level1/xdot.hpp @@ -0,0 +1,42 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdot routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOT_H_ +#define CLBLAST_ROUTINES_XDOT_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdot: public Routine { + public: + + // Constructor + Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT"); + + // Templated-precision implementation of the routine + StatusCode DoDot(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const bool do_conjugate = false); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOT_H_ +#endif diff --git a/src/routines/level1/xdotc.cc b/src/routines/level1/xdotc.cc index b3a01079..27cf2bab 100644 --- a/src/routines/level1/xdotc.cc +++ b/src/routines/level1/xdotc.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xdotc.h" +#include "routines/level1/xdotc.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xdotc.hpp b/src/routines/level1/xdotc.hpp new file mode 100644 index 00000000..b8cbdaf5 --- /dev/null +++ b/src/routines/level1/xdotc.hpp @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdotc routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOTC_H_ +#define CLBLAST_ROUTINES_XDOTC_H_ + +#include "routines/level1/xdot.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdotc: public Xdot<T> { + public: + + // Uses the regular Xdot routine + using Xdot<T>::DoDot; + + // Constructor + Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC"); + + // Templated-precision implementation of the routine + StatusCode DoDotc(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOTC_H_ +#endif diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc index 8dded6e0..0bce70b7 100644 --- a/src/routines/level1/xdotu.cc +++ b/src/routines/level1/xdotu.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xdotu.h" +#include "routines/level1/xdotu.hpp" #include <string> diff --git a/src/routines/level1/xdotu.hpp b/src/routines/level1/xdotu.hpp new file mode 100644 index 00000000..b3f73086 --- /dev/null +++ b/src/routines/level1/xdotu.hpp @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xdotu routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XDOTU_H_ +#define CLBLAST_ROUTINES_XDOTU_H_ + +#include "routines/level1/xdot.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xdotu: public Xdot<T> { + public: + + // Uses the regular Xdot routine + using Xdot<T>::DoDot; + + // Constructor + Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU"); + + // Templated-precision implementation of the routine + StatusCode DoDotu(const size_t n, + const Buffer<T> &dot_buffer, const size_t dot_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XDOTU_H_ +#endif diff --git a/src/routines/level1/xmax.hpp b/src/routines/level1/xmax.hpp new file mode 100644 index 00000000..5a0236f2 --- /dev/null +++ b/src/routines/level1/xmax.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xmax routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XMAX_H_ +#define CLBLAST_ROUTINES_XMAX_H_ + +#include "routine.hpp" +#include "routines/level1/xamax.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xmax: public Xamax<T> { + public: + + // Members and methods from the base class + using Xamax<T>::DoAmax; + + // Constructor + Xmax(Queue &queue, EventPointer event, const std::string &name = "MAX"): + Xamax<T>(queue, event, name) { + } + + // Forwards to the regular absolute version. The implementation difference is realised in the + // kernel through a pre-processor macro based on the name of the routine. + StatusCode DoMax(const size_t n, + const Buffer<unsigned int> &imax_buffer, const size_t imax_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { + return DoAmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XMAX_H_ +#endif diff --git a/src/routines/level1/xmin.hpp b/src/routines/level1/xmin.hpp new file mode 100644 index 00000000..6befec64 --- /dev/null +++ b/src/routines/level1/xmin.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xmin routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XMIN_H_ +#define CLBLAST_ROUTINES_XMIN_H_ + +#include "routine.hpp" +#include "routines/level1/xamax.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xmin: public Xamax<T> { + public: + + // Members and methods from the base class + using Xamax<T>::DoAmax; + + // Constructor + Xmin(Queue &queue, EventPointer event, const std::string &name = "MIN"): + Xamax<T>(queue, event, name) { + } + + // Forwards to the regular max-absolute version. The implementation difference is realised in the + // kernel through a pre-processor macro based on the name of the routine. + StatusCode DoMin(const size_t n, + const Buffer<unsigned int> &imin_buffer, const size_t imin_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { + return DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XMIN_H_ +#endif diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc index 105f991c..97615d8b 100644 --- a/src/routines/level1/xnrm2.cc +++ b/src/routines/level1/xnrm2.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xnrm2.h" +#include "routines/level1/xnrm2.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xnrm2.hpp b/src/routines/level1/xnrm2.hpp new file mode 100644 index 00000000..7baf07f5 --- /dev/null +++ b/src/routines/level1/xnrm2.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xnrm2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XNRM2_H_ +#define CLBLAST_ROUTINES_XNRM2_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xnrm2: public Routine { + public: + + // Constructor + Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2"); + + // Templated-precision implementation of the routine + StatusCode DoNrm2(const size_t n, + const Buffer<T> &nrm2_buffer, const size_t nrm2_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XNRM2_H_ +#endif diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc index 3c1b5257..bcc43c3b 100644 --- a/src/routines/level1/xscal.cc +++ b/src/routines/level1/xscal.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xscal.h" +#include "routines/level1/xscal.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xscal.hpp b/src/routines/level1/xscal.hpp new file mode 100644 index 00000000..6c585cb2 --- /dev/null +++ b/src/routines/level1/xscal.hpp @@ -0,0 +1,39 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xscal routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSCAL_H_ +#define CLBLAST_ROUTINES_XSCAL_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xscal: public Routine { + public: + + // Constructor + Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL"); + + // Templated-precision implementation of the routine + StatusCode DoScal(const size_t n, const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSCAL_H_ +#endif diff --git a/src/routines/level1/xsum.hpp b/src/routines/level1/xsum.hpp new file mode 100644 index 00000000..84e20bea --- /dev/null +++ b/src/routines/level1/xsum.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsum routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSUM_H_ +#define CLBLAST_ROUTINES_XSUM_H_ + +#include "routine.hpp" +#include "routines/level1/xasum.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsum: public Xasum<T> { + public: + + // Members and methods from the base class + using Xasum<T>::DoAsum; + + // Constructor + Xsum(Queue &queue, EventPointer event, const std::string &name = "SUM"): + Xasum<T>(queue, event, name) { + } + + // Forwards to the regular absolute version. The implementation difference is realised in the + // kernel through a pre-processor macro based on the name of the routine. + StatusCode DoSum(const size_t n, + const Buffer<T> &sum_buffer, const size_t sum_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { + return DoAsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSUM_H_ +#endif diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc index 27eb9b13..03907cbd 100644 --- a/src/routines/level1/xswap.cc +++ b/src/routines/level1/xswap.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level1/xswap.h" +#include "routines/level1/xswap.hpp" #include <string> #include <vector> diff --git a/src/routines/level1/xswap.hpp b/src/routines/level1/xswap.hpp new file mode 100644 index 00000000..4f9ea36d --- /dev/null +++ b/src/routines/level1/xswap.hpp @@ -0,0 +1,40 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xswap routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSWAP_H_ +#define CLBLAST_ROUTINES_XSWAP_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xswap: public Routine { + public: + + // Constructor + Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP"); + + // Templated-precision implementation of the routine + StatusCode DoSwap(const size_t n, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSWAP_H_ +#endif diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cc index 7a30c34a..ea4f001c 100644 --- a/src/routines/level2/xgbmv.cc +++ b/src/routines/level2/xgbmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgbmv.h" +#include "routines/level2/xgbmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xgbmv.hpp b/src/routines/level2/xgbmv.hpp new file mode 100644 index 00000000..686ab642 --- /dev/null +++ b/src/routines/level2/xgbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xgbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGBMV_H_ +#define CLBLAST_ROUTINES_XGBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV"); + + // Templated-precision implementation of the routine + StatusCode DoGbmv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGBMV_H_ +#endif diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index ccadd131..21fb397c 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgemv.h" +#include "routines/level2/xgemv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xgemv.hpp b/src/routines/level2/xgemv.hpp new file mode 100644 index 00000000..e9afec8d --- /dev/null +++ b/src/routines/level2/xgemv.hpp @@ -0,0 +1,56 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgemv routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGEMV_H_ +#define CLBLAST_ROUTINES_XGEMV_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgemv: public Routine { + public: + + // Constructor + Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV"); + + // Templated-precision implementation of the routine + StatusCode DoGemv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); + + // Generic version used also for other matrix-vector multiplications + StatusCode MatVec(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, + const size_t kl, const size_t ku); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGEMV_H_ +#endif diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index 6ceaa00e..353047d2 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xger.h" +#include "routines/level2/xger.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xger.hpp b/src/routines/level2/xger.hpp new file mode 100644 index 00000000..3c6abe44 --- /dev/null +++ b/src/routines/level2/xger.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xger routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGER_H_ +#define CLBLAST_ROUTINES_XGER_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xger: public Routine { + public: + + // Constructor + Xger(Queue &queue, EventPointer event, const std::string &name = "GER"); + + // Templated-precision implementation of the routine + StatusCode DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGER_H_ +#endif diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cc index 73284b52..d9feda97 100644 --- a/src/routines/level2/xgerc.cc +++ b/src/routines/level2/xgerc.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgerc.h" +#include "routines/level2/xgerc.hpp" #include <string> diff --git a/src/routines/level2/xgerc.hpp b/src/routines/level2/xgerc.hpp new file mode 100644 index 00000000..f1d04dfd --- /dev/null +++ b/src/routines/level2/xgerc.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgerc routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERC_H_ +#define CLBLAST_ROUTINES_XGERC_H_ + +#include "routines/level2/xger.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgerc: public Xger<T> { + public: + + // Uses the regular Xger routine + using Xger<T>::DoGer; + + // Constructor + Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC"); + + // Templated-precision implementation of the routine + StatusCode DoGerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERC_H_ +#endif diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cc index 7730d6a5..da9e91c2 100644 --- a/src/routines/level2/xgeru.cc +++ b/src/routines/level2/xgeru.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgeru.h" +#include "routines/level2/xgeru.hpp" #include <string> diff --git a/src/routines/level2/xgeru.hpp b/src/routines/level2/xgeru.hpp new file mode 100644 index 00000000..fb50e917 --- /dev/null +++ b/src/routines/level2/xgeru.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgeru routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERU_H_ +#define CLBLAST_ROUTINES_XGERU_H_ + +#include "routines/level2/xger.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgeru: public Xger<T> { + public: + + // Uses the regular Xger routine + using Xger<T>::DoGer; + + // Constructor + Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU"); + + // Templated-precision implementation of the routine + StatusCode DoGeru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERU_H_ +#endif diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cc index 58591b50..f6c0e3c4 100644 --- a/src/routines/level2/xhbmv.cc +++ b/src/routines/level2/xhbmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhbmv.h" +#include "routines/level2/xhbmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhbmv.hpp b/src/routines/level2/xhbmv.hpp new file mode 100644 index 00000000..d668eb88 --- /dev/null +++ b/src/routines/level2/xhbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHBMV_H_ +#define CLBLAST_ROUTINES_XHBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV"); + + // Templated-precision implementation of the routine + StatusCode DoHbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHBMV_H_ +#endif diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cc index b4ef0fa4..2cbcf7b4 100644 --- a/src/routines/level2/xhemv.cc +++ b/src/routines/level2/xhemv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhemv.h" +#include "routines/level2/xhemv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhemv.hpp b/src/routines/level2/xhemv.hpp new file mode 100644 index 00000000..8e062fd3 --- /dev/null +++ b/src/routines/level2/xhemv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhemv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhemv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHEMV_H_ +#define CLBLAST_ROUTINES_XHEMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhemv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV"); + + // Templated-precision implementation of the routine + StatusCode DoHemv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHEMV_H_ +#endif diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 939e17bb..ed8ba9e9 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xher.h" +#include "routines/level2/xher.hpp" #include <string> diff --git a/src/routines/level2/xher.hpp b/src/routines/level2/xher.hpp new file mode 100644 index 00000000..9ff6bf3f --- /dev/null +++ b/src/routines/level2/xher.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xher routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER_H_ +#define CLBLAST_ROUTINES_XHER_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xher: public Routine { + public: + + // Constructor + Xher(Queue &queue, EventPointer event, const std::string &name = "HER"); + + // Translates alpha of type 'U' into type 'T' + T GetAlpha(const U alpha); + + // Templated-precision implementation of the routine + StatusCode DoHer(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER_H_ +#endif diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index 95dbd87a..50572cea 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xher2.h" +#include "routines/level2/xher2.hpp" #include <string> diff --git a/src/routines/level2/xher2.hpp b/src/routines/level2/xher2.hpp new file mode 100644 index 00000000..8c53c047 --- /dev/null +++ b/src/routines/level2/xher2.hpp @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xher2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER2_H_ +#define CLBLAST_ROUTINES_XHER2_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xher2: public Routine { + public: + + // Constructor + Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2"); + + // Templated-precision implementation of the routine + StatusCode DoHer2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER2_H_ +#endif diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cc index 92686dbe..e6f82b34 100644 --- a/src/routines/level2/xhpmv.cc +++ b/src/routines/level2/xhpmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpmv.h" +#include "routines/level2/xhpmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhpmv.hpp b/src/routines/level2/xhpmv.hpp new file mode 100644 index 00000000..b11192f9 --- /dev/null +++ b/src/routines/level2/xhpmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhpmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPMV_H_ +#define CLBLAST_ROUTINES_XHPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhpmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV"); + + // Templated-precision implementation of the routine + StatusCode DoHpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPMV_H_ +#endif diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cc index 4b31ad09..225ebfe5 100644 --- a/src/routines/level2/xhpr.cc +++ b/src/routines/level2/xhpr.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpr.h" +#include "routines/level2/xhpr.hpp" #include <string> diff --git a/src/routines/level2/xhpr.hpp b/src/routines/level2/xhpr.hpp new file mode 100644 index 00000000..37801c68 --- /dev/null +++ b/src/routines/level2/xhpr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR_H_ +#define CLBLAST_ROUTINES_XHPR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xhpr: public Xher<T,U> { + public: + + // Uses the regular Xher routine + using Xher<T,U>::DoHer; + + // Constructor + Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR"); + + // Templated-precision implementation of the routine + StatusCode DoHpr(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR_H_ +#endif diff --git a/src/routines/level2/xhpr2.cc b/src/routines/level2/xhpr2.cc index 9be24f43..85f9d3f9 100644 --- a/src/routines/level2/xhpr2.cc +++ b/src/routines/level2/xhpr2.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpr2.h" +#include "routines/level2/xhpr2.hpp" #include <string> diff --git a/src/routines/level2/xhpr2.hpp b/src/routines/level2/xhpr2.hpp new file mode 100644 index 00000000..d66dce55 --- /dev/null +++ b/src/routines/level2/xhpr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR2_H_ +#define CLBLAST_ROUTINES_XHPR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhpr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2"); + + // Templated-precision implementation of the routine + StatusCode DoHpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR2_H_ +#endif diff --git a/src/routines/level2/xsbmv.cc b/src/routines/level2/xsbmv.cc index 66ba74e8..28730899 100644 --- a/src/routines/level2/xsbmv.cc +++ b/src/routines/level2/xsbmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsbmv.h" +#include "routines/level2/xsbmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xsbmv.hpp b/src/routines/level2/xsbmv.hpp new file mode 100644 index 00000000..16c5e9a8 --- /dev/null +++ b/src/routines/level2/xsbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xsbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSBMV_H_ +#define CLBLAST_ROUTINES_XSBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV"); + + // Templated-precision implementation of the routine + StatusCode DoSbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSBMV_H_ +#endif diff --git a/src/routines/level2/xspmv.cc b/src/routines/level2/xspmv.cc index 589a97d4..f6651012 100644 --- a/src/routines/level2/xspmv.cc +++ b/src/routines/level2/xspmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspmv.h" +#include "routines/level2/xspmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xspmv.hpp b/src/routines/level2/xspmv.hpp new file mode 100644 index 00000000..a0c69b85 --- /dev/null +++ b/src/routines/level2/xspmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xspmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPMV_H_ +#define CLBLAST_ROUTINES_XSPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV"); + + // Templated-precision implementation of the routine + StatusCode DoSpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPMV_H_ +#endif diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc index c556b920..a75fe9c3 100644 --- a/src/routines/level2/xspr.cc +++ b/src/routines/level2/xspr.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspr.h" +#include "routines/level2/xspr.hpp" #include <string> diff --git a/src/routines/level2/xspr.hpp b/src/routines/level2/xspr.hpp new file mode 100644 index 00000000..6468c736 --- /dev/null +++ b/src/routines/level2/xspr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR_H_ +#define CLBLAST_ROUTINES_XSPR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspr: public Xher<T,T> { + public: + + // Uses the regular Xher routine + using Xher<T,T>::DoHer; + + // Constructor + Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR"); + + // Templated-precision implementation of the routine + StatusCode DoSpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR_H_ +#endif diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cc index c4ad5dc4..c39a2eb4 100644 --- a/src/routines/level2/xspr2.cc +++ b/src/routines/level2/xspr2.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspr2.h" +#include "routines/level2/xspr2.hpp" #include <string> diff --git a/src/routines/level2/xspr2.hpp b/src/routines/level2/xspr2.hpp new file mode 100644 index 00000000..693c56a1 --- /dev/null +++ b/src/routines/level2/xspr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR2_H_ +#define CLBLAST_ROUTINES_XSPR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2"); + + // Templated-precision implementation of the routine + StatusCode DoSpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR2_H_ +#endif diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cc index 2a404a8a..648d2a3e 100644 --- a/src/routines/level2/xsymv.cc +++ b/src/routines/level2/xsymv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsymv.h" +#include "routines/level2/xsymv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xsymv.hpp b/src/routines/level2/xsymv.hpp new file mode 100644 index 00000000..67815f2f --- /dev/null +++ b/src/routines/level2/xsymv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsymv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xsymv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYMV_H_ +#define CLBLAST_ROUTINES_XSYMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsymv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV"); + + // Templated-precision implementation of the routine + StatusCode DoSymv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYMV_H_ +#endif diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc index 892517d7..758d8f8f 100644 --- a/src/routines/level2/xsyr.cc +++ b/src/routines/level2/xsyr.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsyr.h" +#include "routines/level2/xsyr.hpp" #include <string> diff --git a/src/routines/level2/xsyr.hpp b/src/routines/level2/xsyr.hpp new file mode 100644 index 00000000..20393454 --- /dev/null +++ b/src/routines/level2/xsyr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR_H_ +#define CLBLAST_ROUTINES_XSYR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyr: public Xher<T,T> { + public: + + // Uses the regular Xher routine + using Xher<T,T>::DoHer; + + // Constructor + Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR"); + + // Templated-precision implementation of the routine + StatusCode DoSyr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR_H_ +#endif diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cc index e6dfd158..6f43b219 100644 --- a/src/routines/level2/xsyr2.cc +++ b/src/routines/level2/xsyr2.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsyr2.h" +#include "routines/level2/xsyr2.hpp" #include <string> diff --git a/src/routines/level2/xsyr2.hpp b/src/routines/level2/xsyr2.hpp new file mode 100644 index 00000000..1a8dcbe8 --- /dev/null +++ b/src/routines/level2/xsyr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR2_H_ +#define CLBLAST_ROUTINES_XSYR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2"); + + // Templated-precision implementation of the routine + StatusCode DoSyr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR2_H_ +#endif diff --git a/src/routines/level2/xtbmv.cc b/src/routines/level2/xtbmv.cc index 86e28dfb..e315c544 100644 --- a/src/routines/level2/xtbmv.cc +++ b/src/routines/level2/xtbmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtbmv.h" +#include "routines/level2/xtbmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xtbmv.hpp b/src/routines/level2/xtbmv.hpp new file mode 100644 index 00000000..389e9705 --- /dev/null +++ b/src/routines/level2/xtbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTBMV_H_ +#define CLBLAST_ROUTINES_XTBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV"); + + // Templated-precision implementation of the routine + StatusCode DoTbmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, const size_t k, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTBMV_H_ +#endif diff --git a/src/routines/level2/xtpmv.cc b/src/routines/level2/xtpmv.cc index 72445547..46811089 100644 --- a/src/routines/level2/xtpmv.cc +++ b/src/routines/level2/xtpmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtpmv.h" +#include "routines/level2/xtpmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xtpmv.hpp b/src/routines/level2/xtpmv.hpp new file mode 100644 index 00000000..0e8cf1d2 --- /dev/null +++ b/src/routines/level2/xtpmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtpmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtpmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTPMV_H_ +#define CLBLAST_ROUTINES_XTPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtpmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV"); + + // Templated-precision implementation of the routine + StatusCode DoTpmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTPMV_H_ +#endif diff --git a/src/routines/level2/xtrmv.cc b/src/routines/level2/xtrmv.cc index df6f85a3..d2f24252 100644 --- a/src/routines/level2/xtrmv.cc +++ b/src/routines/level2/xtrmv.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtrmv.h" +#include "routines/level2/xtrmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xtrmv.hpp b/src/routines/level2/xtrmv.hpp new file mode 100644 index 00000000..07dd7841 --- /dev/null +++ b/src/routines/level2/xtrmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtrmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtrmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTRMV_H_ +#define CLBLAST_ROUTINES_XTRMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtrmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV"); + + // Templated-precision implementation of the routine + StatusCode DoTrmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTRMV_H_ +#endif diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 8386ad09..9ea5559c 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xgemm.h" +#include "routines/level3/xgemm.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xgemm.hpp b/src/routines/level3/xgemm.hpp new file mode 100644 index 00000000..71723d78 --- /dev/null +++ b/src/routines/level3/xgemm.hpp @@ -0,0 +1,48 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgemm routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGEMM_H_ +#define CLBLAST_ROUTINES_XGEMM_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgemm: public Routine { + public: + + // Constructor + Xgemm(Queue &queue, EventPointer event, const std::string &name = "GEMM"); + + // Templated-precision implementation of the routine + StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); + + protected: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGEMM_H_ +#endif diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index 8120c09c..9813503e 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xhemm.h" +#include "routines/level3/xhemm.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xhemm.hpp b/src/routines/level3/xhemm.hpp new file mode 100644 index 00000000..d79b42a1 --- /dev/null +++ b/src/routines/level3/xhemm.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhemm routine. It is based on the generalized matrix multiplication +// routine (Xgemm). The implementation is very similar to the Xsymm routine. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHEMM_H_ +#define CLBLAST_ROUTINES_XHEMM_H_ + +#include "routines/level3/xgemm.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhemm: public Xgemm<T> { + public: + + // Uses methods and variables the regular Xgemm routine + using Xgemm<T>::precision_; + using Xgemm<T>::routine_name_; + using Xgemm<T>::queue_; + using Xgemm<T>::context_; + using Xgemm<T>::device_; + using Xgemm<T>::db_; + using Xgemm<T>::DoGemm; + + // Constructor + Xhemm(Queue &queue, EventPointer event, const std::string &name = "HEMM"); + + // Templated-precision implementation of the routine + StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHEMM_H_ +#endif diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index bd0f83dd..bd7a053e 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xher2k.h" +#include "routines/level3/xher2k.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xher2k.hpp b/src/routines/level3/xher2k.hpp new file mode 100644 index 00000000..23996219 --- /dev/null +++ b/src/routines/level3/xher2k.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xher2k routine. The precision is implemented using the template argument +// 'T', whereas the alpha/beta arguments are of type 'U'. The implementation is very similar to the +// Xsyr2k routine. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER2K_H_ +#define CLBLAST_ROUTINES_XHER2K_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xher2k: public Routine { + public: + + // Constructor + Xher2k(Queue &queue, EventPointer event, const std::string &name = "HER2K"); + + // Templated-precision implementation of the routine + StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const U beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER2K_H_ +#endif diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 6155734a..6ef7f21f 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xherk.h" +#include "routines/level3/xherk.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xherk.hpp b/src/routines/level3/xherk.hpp new file mode 100644 index 00000000..3f156a1b --- /dev/null +++ b/src/routines/level3/xherk.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xherk routine. The precision is implemented using the template argument +// 'T', whereas the alpha/beta arguments are of type 'U'. The implementation is very similar to the +// Xsyrk routine. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHERK_H_ +#define CLBLAST_ROUTINES_XHERK_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xherk: public Routine { + public: + + // Constructor + Xherk(Queue &queue, EventPointer event, const std::string &name = "HERK"); + + // Templated-precision implementation of the routine + StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose, + const size_t n, const size_t k, + const U alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const U beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHERK_H_ +#endif diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index c5e56617..04e4b718 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xsymm.h" +#include "routines/level3/xsymm.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xsymm.hpp b/src/routines/level3/xsymm.hpp new file mode 100644 index 00000000..754dd7a0 --- /dev/null +++ b/src/routines/level3/xsymm.hpp @@ -0,0 +1,56 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsymm routine. It is based on the generalized matrix multiplication +// routine (Xgemm). The Xsymm class inherits from the templated class Xgemm, allowing it to call the +// "DoGemm" function directly. The "DoSymm" function first preprocesses the symmetric matrix by +// transforming it into a general matrix, and then calls the regular GEMM code. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYMM_H_ +#define CLBLAST_ROUTINES_XSYMM_H_ + +#include "routines/level3/xgemm.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsymm: public Xgemm<T> { + public: + + // Uses methods and variables the regular Xgemm routine + using Xgemm<T>::precision_; + using Xgemm<T>::routine_name_; + using Xgemm<T>::queue_; + using Xgemm<T>::context_; + using Xgemm<T>::device_; + using Xgemm<T>::db_; + using Xgemm<T>::DoGemm; + + // Constructor + Xsymm(Queue &queue, EventPointer event, const std::string &name = "SYMM"); + + // Templated-precision implementation of the routine + StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYMM_H_ +#endif diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index f9655889..424d4d2d 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xsyr2k.h" +#include "routines/level3/xsyr2k.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xsyr2k.hpp b/src/routines/level3/xsyr2k.hpp new file mode 100644 index 00000000..56185653 --- /dev/null +++ b/src/routines/level3/xsyr2k.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyr2k routine. The precision is implemented using a template argument. +// The implementation is very similar to Xsyrk (see header for details), except for the fact that +// the main XgemmUpper/XgemmLower kernel is called twice: C = AB^T + C and C = BA^T + C. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR2K_H_ +#define CLBLAST_ROUTINES_XSYR2K_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyr2k: public Routine { + public: + + // Constructor + Xsyr2k(Queue &queue, EventPointer event, const std::string &name = "SYR2K"); + + // Templated-precision implementation of the routine + StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR2K_H_ +#endif diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index bceb6afd..f56c232b 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xsyrk.h" +#include "routines/level3/xsyrk.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xsyrk.hpp b/src/routines/level3/xsyrk.hpp new file mode 100644 index 00000000..7c075c26 --- /dev/null +++ b/src/routines/level3/xsyrk.hpp @@ -0,0 +1,47 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyrk routine. The precision is implemented using a template argument. +// The implementation is based on the regular Xgemm routine and kernel, but with two main changes: +// 1) The final unpad(transpose) kernel updates only the upper/lower triangular part. +// 2) The main Xgemm kernel masks workgroups not contributing to usefull data. This is only for +// performance reasons, as the actual masking is done later (see the first point). +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYRK_H_ +#define CLBLAST_ROUTINES_XSYRK_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyrk: public Routine { + public: + + // Constructor + Xsyrk(Queue &queue, EventPointer event, const std::string &name = "SYRK"); + + // Templated-precision implementation of the routine + StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYRK_H_ +#endif diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index 92dda9fb..74a82822 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level3/xtrmm.h" +#include "routines/level3/xtrmm.hpp" #include <string> #include <vector> diff --git a/src/routines/level3/xtrmm.hpp b/src/routines/level3/xtrmm.hpp new file mode 100644 index 00000000..bb435592 --- /dev/null +++ b/src/routines/level3/xtrmm.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtrmm routine. The implementation is based on first transforming the +// upper/lower unit/non-unit triangular matrix into a regular matrix and then calling the GEMM +// routine. Therefore, this class inherits from the Xgemm class. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTRMM_H_ +#define CLBLAST_ROUTINES_XTRMM_H_ + +#include "routines/level3/xgemm.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtrmm: public Xgemm<T> { + public: + + // Uses methods and variables the regular Xgemm routine + using Xgemm<T>::precision_; + using Xgemm<T>::routine_name_; + using Xgemm<T>::queue_; + using Xgemm<T>::context_; + using Xgemm<T>::device_; + using Xgemm<T>::db_; + using Xgemm<T>::DoGemm; + + // Constructor + Xtrmm(Queue &queue, EventPointer event, const std::string &name = "TRMM"); + + // Templated-precision implementation of the routine + StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTRMM_H_ +#endif diff --git a/src/routines/levelx/xomatcopy.cc b/src/routines/levelx/xomatcopy.cc index 6e4bddb2..e8593301 100644 --- a/src/routines/levelx/xomatcopy.cc +++ b/src/routines/levelx/xomatcopy.cc @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/levelx/xomatcopy.h" +#include "routines/levelx/xomatcopy.hpp" #include <string> #include <vector> diff --git a/src/routines/levelx/xomatcopy.hpp b/src/routines/levelx/xomatcopy.hpp new file mode 100644 index 00000000..0e580230 --- /dev/null +++ b/src/routines/levelx/xomatcopy.hpp @@ -0,0 +1,41 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xomatcopy routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XOMATCOPY_H_ +#define CLBLAST_ROUTINES_XOMATCOPY_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xomatcopy: public Routine { + public: + + // Constructor + Xomatcopy(Queue &queue, EventPointer event, const std::string &name = "OMATCOPY"); + + // Templated-precision implementation of the routine + StatusCode DoOmatcopy(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XOMATCOPY_H_ +#endif |