diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-07-27 07:18:06 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-07-27 07:18:06 +0200 |
commit | f7199b831f847340f0921ef2140a4e64809db037 (patch) | |
tree | d725b7e63b0662598ad4be0a4c2457820ded8ed4 /src/routines/level3 | |
parent | b10f4a633c4ffb3bb04d35503396ff94528df4d0 (diff) |
Now using the new Claduc C++11 OpenCL header
Diffstat (limited to 'src/routines/level3')
-rw-r--r-- | src/routines/level3/xgemm.cc | 16 | ||||
-rw-r--r-- | src/routines/level3/xhemm.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 20 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 14 | ||||
-rw-r--r-- | src/routines/level3/xsymm.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 16 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 12 | ||||
-rw-r--r-- | src/routines/level3/xtrmm.cc | 8 |
8 files changed, 53 insertions, 53 deletions
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 85524891..525a82e6 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -29,8 +29,8 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xgemm<T>::Xgemm(CommandQueue &queue, Event &event): - Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xgemm<T>::Xgemm(Queue &queue, Event &event): + Routine<T>(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -48,10 +48,10 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const size_t m, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) || (k == 0)) { return StatusCode::kInvalidDimension; } @@ -117,9 +117,9 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, c_do_transpose == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*m_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = (c_no_temp) ? c_buffer : Buffer(context_, CL_MEM_READ_WRITE, m_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*m_ceiled); + auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto c_temp = (c_no_temp) ? c_buffer : Buffer<T>(context_, m_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index bc257c44..a1c0c7c1 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> -Xhemm<T>::Xhemm(CommandQueue &queue, Event &event): +Xhemm<T>::Xhemm(Queue &queue, Event &event): Xgemm<T>(queue, event) { } @@ -32,10 +32,10 @@ template <typename T> StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; } @@ -56,7 +56,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the hermitian matrix try { - auto temp_herm = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_herm = Buffer<T>(context_, k*k); // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm // routine afterwards diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index fa42733f..29b2f733 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -27,8 +27,8 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom // Constructor: forwards to base class constructor template <typename T, typename U> -Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event): - Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xher2k<T,U>::Xher2k(Queue &queue, Event &event): + Routine<T>(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -45,10 +45,10 @@ template <typename T, typename U> StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, const U beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -105,11 +105,11 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co ab_rotated == false && ab_conjugate == true; // Creates the temporary matrices - auto a1_temp = (a1_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto a2_temp = (a2_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b1_temp = (b1_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b2_temp = (b2_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a1_temp = (a1_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto a2_temp = (a2_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto b1_temp = (b1_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto b2_temp = (b2_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index ae350050..5174e9ab 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -27,8 +27,8 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp // Constructor: forwards to base class constructor template <typename T, typename U> -Xherk<T,U>::Xherk(CommandQueue &queue, Event &event): - Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xherk<T,U>::Xherk(Queue &queue, Event &event): + Routine<T>(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -45,9 +45,9 @@ template <typename T, typename U> StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const U alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, const U beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -98,9 +98,9 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons a_rotated == false && b_conjugate == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto b_temp = (b_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index 1d17f0eb..37c08d3b 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> -Xsymm<T>::Xsymm(CommandQueue &queue, Event &event): +Xsymm<T>::Xsymm(Queue &queue, Event &event): Xgemm<T>(queue, event) { } @@ -32,10 +32,10 @@ template <typename T> StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; } @@ -56,7 +56,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the symmetric matrix try { - auto temp_symm = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_symm = Buffer<T>(context_, k*k); // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 7ab3430a..b36e7c5e 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -29,8 +29,8 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template <typename T> -Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event): - Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xsyr2k<T>::Xsyr2k(Queue &queue, Event &event): + Routine<T>(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -47,10 +47,10 @@ template <typename T> StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -99,9 +99,9 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons ab_rotated == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index c6feb5e6..e4668216 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -29,8 +29,8 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> -Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event): - Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xsyrk<T>::Xsyrk(Queue &queue, Event &event): + Routine<T>(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -47,9 +47,9 @@ template <typename T> StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -93,8 +93,8 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const a_rotated == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index 52f272e3..8be7d950 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> -Xtrmm<T>::Xtrmm(CommandQueue &queue, Event &event): +Xtrmm<T>::Xtrmm(Queue &queue, Event &event): Xgemm<T>(queue, event) { } @@ -33,8 +33,8 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle const Transpose a_transpose, const Diagonal diagonal, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld) { + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0)) { return StatusCode::kInvalidDimension; } @@ -58,7 +58,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the triangular matrix try { - auto temp_triangular = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_triangular = Buffer<T>(context_, k*k); // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm // routine afterwards |