From f7199b831f847340f0921ef2140a4e64809db037 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 27 Jul 2015 07:18:06 +0200 Subject: Now using the new Claduc C++11 OpenCL header --- src/routines/level1/xaxpy.cc | 8 ++++---- src/routines/level2/xgemv.cc | 10 +++++----- src/routines/level3/xgemm.cc | 16 ++++++++-------- src/routines/level3/xhemm.cc | 10 +++++----- src/routines/level3/xher2k.cc | 20 ++++++++++---------- src/routines/level3/xherk.cc | 14 +++++++------- src/routines/level3/xsymm.cc | 10 +++++----- src/routines/level3/xsyr2k.cc | 16 ++++++++-------- src/routines/level3/xsyrk.cc | 12 ++++++------ src/routines/level3/xtrmm.cc | 8 ++++---- 10 files changed, 62 insertions(+), 62 deletions(-) (limited to 'src/routines') diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index e6b320d9..7646b0e4 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -29,8 +29,8 @@ template <> const Precision Xaxpy::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template -Xaxpy::Xaxpy(CommandQueue &queue, Event &event): - Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) { +Xaxpy::Xaxpy(Queue &queue, Event &event): + Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/xaxpy.opencl" ; @@ -41,8 +41,8 @@ Xaxpy::Xaxpy(CommandQueue &queue, Event &event): // The main routine template StatusCode Xaxpy::DoAxpy(const size_t n, const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { // Makes sure all dimensions are larger than zero if (n == 0) { return StatusCode::kInvalidDimension; } diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index a7052af8..75219b63 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -29,8 +29,8 @@ template <> const Precision Xgemv::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template -Xgemv::Xgemv(CommandQueue &queue, Event &event): - Routine(queue, event, "GEMV", {"Xgemv"}, precision_) { +Xgemv::Xgemv(Queue &queue, Event &event): + Routine(queue, event, "GEMV", {"Xgemv"}, precision_) { source_string_ = #include "../../kernels/xgemv.opencl" ; @@ -43,10 +43,10 @@ template StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { // Makes sure all dimensions are larger than zero if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 85524891..525a82e6 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -29,8 +29,8 @@ template <> const Precision Xgemm::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template -Xgemm::Xgemm(CommandQueue &queue, Event &event): - Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xgemm::Xgemm(Queue &queue, Event &event): + Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -48,10 +48,10 @@ StatusCode Xgemm::DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const size_t m, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) || (k == 0)) { return StatusCode::kInvalidDimension; } @@ -117,9 +117,9 @@ StatusCode Xgemm::DoGemm(const Layout layout, c_do_transpose == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*m_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = (c_no_temp) ? c_buffer : Buffer(context_, CL_MEM_READ_WRITE, m_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, k_ceiled*m_ceiled); + auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto c_temp = (c_no_temp) ? c_buffer : Buffer(context_, m_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index bc257c44..a1c0c7c1 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template -Xhemm::Xhemm(CommandQueue &queue, Event &event): +Xhemm::Xhemm(Queue &queue, Event &event): Xgemm(queue, event) { } @@ -32,10 +32,10 @@ template StatusCode Xhemm::DoHemm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; } @@ -56,7 +56,7 @@ StatusCode Xhemm::DoHemm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the hermitian matrix try { - auto temp_herm = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_herm = Buffer(context_, k*k); // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm // routine afterwards diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index fa42733f..29b2f733 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -27,8 +27,8 @@ template <> const Precision Xher2k::precision_ = Precision::kCom // Constructor: forwards to base class constructor template -Xher2k::Xher2k(CommandQueue &queue, Event &event): - Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xher2k::Xher2k(Queue &queue, Event &event): + Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -45,10 +45,10 @@ template StatusCode Xher2k::DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const U beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -105,11 +105,11 @@ StatusCode Xher2k::DoHer2k(const Layout layout, const Triangle triangle, co ab_rotated == false && ab_conjugate == true; // Creates the temporary matrices - auto a1_temp = (a1_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto a2_temp = (a2_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b1_temp = (b1_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b2_temp = (b2_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a1_temp = (a1_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto a2_temp = (a2_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto b1_temp = (b1_no_temp) ? b_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto b2_temp = (b2_no_temp) ? b_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index ae350050..5174e9ab 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -27,8 +27,8 @@ template <> const Precision Xherk::precision_ = Precision::kComp // Constructor: forwards to base class constructor template -Xherk::Xherk(CommandQueue &queue, Event &event): - Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xherk::Xherk(Queue &queue, Event &event): + Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -45,9 +45,9 @@ template StatusCode Xherk::DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const U alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const U beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -98,9 +98,9 @@ StatusCode Xherk::DoHerk(const Layout layout, const Triangle triangle, cons a_rotated == false && b_conjugate == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto b_temp = (b_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index 1d17f0eb..37c08d3b 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template -Xsymm::Xsymm(CommandQueue &queue, Event &event): +Xsymm::Xsymm(Queue &queue, Event &event): Xgemm(queue, event) { } @@ -32,10 +32,10 @@ template StatusCode Xsymm::DoSymm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; } @@ -56,7 +56,7 @@ StatusCode Xsymm::DoSymm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the symmetric matrix try { - auto temp_symm = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_symm = Buffer(context_, k*k); // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 7ab3430a..b36e7c5e 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -29,8 +29,8 @@ template <> const Precision Xsyr2k::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template -Xsyr2k::Xsyr2k(CommandQueue &queue, Event &event): - Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xsyr2k::Xsyr2k(Queue &queue, Event &event): + Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -47,10 +47,10 @@ template StatusCode Xsyr2k::DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -99,9 +99,9 @@ StatusCode Xsyr2k::DoSyr2k(const Layout layout, const Triangle triangle, cons ab_rotated == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto b_temp = (b_no_temp) ? b_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index c6feb5e6..e4668216 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -29,8 +29,8 @@ template <> const Precision Xsyrk::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template -Xsyrk::Xsyrk(CommandQueue &queue, Event &event): - Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { +Xsyrk::Xsyrk(Queue &queue, Event &event): + Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" @@ -47,9 +47,9 @@ template StatusCode Xsyrk::DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; } @@ -93,8 +93,8 @@ StatusCode Xsyrk::DoSyrk(const Layout layout, const Triangle triangle, const a_rotated == false; // Creates the temporary matrices - auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, CL_MEM_READ_WRITE, k_ceiled*n_ceiled*sizeof(T)); - auto c_temp = Buffer(context_, CL_MEM_READ_WRITE, n_ceiled*n_ceiled*sizeof(T)); + auto a_temp = (a_no_temp) ? a_buffer : Buffer(context_, k_ceiled*n_ceiled); + auto c_temp = Buffer(context_, n_ceiled*n_ceiled); // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index 52f272e3..8be7d950 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -21,7 +21,7 @@ namespace clblast { // Constructor: forwards to base class constructor template -Xtrmm::Xtrmm(CommandQueue &queue, Event &event): +Xtrmm::Xtrmm(Queue &queue, Event &event): Xgemm(queue, event) { } @@ -33,8 +33,8 @@ StatusCode Xtrmm::DoTrmm(const Layout layout, const Side side, const Triangle const Transpose a_transpose, const Diagonal diagonal, const size_t m, const size_t n, const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld) { + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0)) { return StatusCode::kInvalidDimension; } @@ -58,7 +58,7 @@ StatusCode Xtrmm::DoTrmm(const Layout layout, const Side side, const Triangle // Temporary buffer for a copy of the triangular matrix try { - auto temp_triangular = Buffer(context_, CL_MEM_READ_WRITE, k*k*sizeof(T)); + auto temp_triangular = Buffer(context_, k*k); // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm // routine afterwards -- cgit v1.2.3