From 3e9a07f00ad62c9d0e27c385249ed2b510acceff Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 22 May 2016 16:59:14 +0200 Subject: Added level-2 half-precision routines HGER/HSYR/HSPR/HSYR2/HSPR2 --- src/clblast.cc | 43 +++++++++++++++++++++--- src/clblast_c.cc | 80 ++++++++++++++++++++++++++++++++++++++++++++ src/routines/level2/xger.cc | 2 ++ src/routines/level2/xher.cc | 3 ++ src/routines/level2/xher2.cc | 2 ++ src/routines/level2/xspr.cc | 1 + src/routines/level2/xspr2.cc | 1 + src/routines/level2/xsyr.cc | 1 + src/routines/level2/xsyr2.cc | 1 + 9 files changed, 129 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/clblast.cc b/src/clblast.cc index e89b41e8..449c7321 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -1207,7 +1207,7 @@ template StatusCode PUBLIC_API Tpsv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); -// General rank-1 matrix update: SGER/DGER +// General rank-1 matrix update: SGER/DGER/HGER template StatusCode Ger(const Layout layout, const size_t m, const size_t n, @@ -1241,6 +1241,13 @@ template StatusCode PUBLIC_API Ger(const Layout, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Ger(const Layout, + const size_t, const size_t, + const half, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); // General rank-1 complex matrix update: CGERU/ZGERU template @@ -1444,7 +1451,7 @@ template StatusCode PUBLIC_API Hpr2(const Layout, const Triangle, cl_mem, const size_t, cl_command_queue*, cl_event*); -// Symmetric rank-1 matrix update: SSYR/DSYR +// Symmetric rank-1 matrix update: SSYR/DSYR/HSYR template StatusCode Syr(const Layout layout, const Triangle triangle, const size_t n, @@ -1474,8 +1481,14 @@ template StatusCode PUBLIC_API Syr(const Layout, const Triangle, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Syr(const Layout, const Triangle, + const size_t, + const half, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Symmetric packed rank-1 matrix update: SSPR/DSPR +// Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR template StatusCode Spr(const Layout layout, const Triangle triangle, const size_t n, @@ -1505,8 +1518,14 @@ template StatusCode PUBLIC_API Spr(const Layout, const Triangle, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Spr(const Layout, const Triangle, + const size_t, + const half, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); -// Symmetric rank-2 matrix update: SSYR2/DSYR2 +// Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 template StatusCode Syr2(const Layout layout, const Triangle triangle, const size_t n, @@ -1540,8 +1559,15 @@ template StatusCode PUBLIC_API Syr2(const Layout, const Triangle, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Syr2(const Layout, const Triangle, + const size_t, + const half, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Symmetric packed rank-2 matrix update: SSPR2/DSPR2 +// Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 template StatusCode Spr2(const Layout layout, const Triangle triangle, const size_t n, @@ -1575,6 +1601,13 @@ template StatusCode PUBLIC_API Spr2(const Layout, const Triangle, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Spr2(const Layout, const Triangle, + const size_t, + const half, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, + cl_command_queue*, cl_event*); // ================================================================================================= // BLAS level-3 (matrix-matrix) routines diff --git a/src/clblast_c.cc b/src/clblast_c.cc index f1a81be5..c368a03c 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -1702,6 +1702,22 @@ StatusCode CLBlastDger(const Layout layout, queue, event); return static_cast(status); } +StatusCode CLBlastHger(const Layout layout, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Ger(static_cast(layout), + m, n, + alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld, + queue, event); + return static_cast(status); +} // GERU StatusCode CLBlastCgeru(const Layout layout, @@ -1938,6 +1954,21 @@ StatusCode CLBlastDsyr(const Layout layout, const Triangle triangle, queue, event); return static_cast(status); } +StatusCode CLBlastHsyr(const Layout layout, const Triangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Syr(static_cast(layout), + static_cast(triangle), + n, + alpha, + x_buffer, x_offset, x_inc, + a_buffer, a_offset, a_ld, + queue, event); + return static_cast(status); +} // SPR StatusCode CLBlastSspr(const Layout layout, const Triangle triangle, @@ -1970,6 +2001,21 @@ StatusCode CLBlastDspr(const Layout layout, const Triangle triangle, queue, event); return static_cast(status); } +StatusCode CLBlastHspr(const Layout layout, const Triangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Spr(static_cast(layout), + static_cast(triangle), + n, + alpha, + x_buffer, x_offset, x_inc, + ap_buffer, ap_offset, + queue, event); + return static_cast(status); +} // SYR2 StatusCode CLBlastSsyr2(const Layout layout, const Triangle triangle, @@ -2006,6 +2052,23 @@ StatusCode CLBlastDsyr2(const Layout layout, const Triangle triangle, queue, event); return static_cast(status); } +StatusCode CLBlastHsyr2(const Layout layout, const Triangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Syr2(static_cast(layout), + static_cast(triangle), + n, + alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld, + queue, event); + return static_cast(status); +} // SPR2 StatusCode CLBlastSspr2(const Layout layout, const Triangle triangle, @@ -2042,6 +2105,23 @@ StatusCode CLBlastDspr2(const Layout layout, const Triangle triangle, queue, event); return static_cast(status); } +StatusCode CLBlastHspr2(const Layout layout, const Triangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Spr2(static_cast(layout), + static_cast(triangle), + n, + alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + ap_buffer, ap_offset, + queue, event); + return static_cast(status); +} // ================================================================================================= // BLAS level-3 (matrix-matrix) routines diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index 47d7abe2..d1f98990 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xger::precision_ = Precision::kHalf; template <> const Precision Xger::precision_ = Precision::kSingle; template <> const Precision Xger::precision_ = Precision::kDouble; template <> const Precision Xger::precision_ = Precision::kComplexSingle; @@ -104,6 +105,7 @@ StatusCode Xger::DoGer(const Layout layout, // ================================================================================================= // Compiles the templated class +template class Xger; template class Xger; template class Xger; template class Xger; diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 852e3f15..73e7a47d 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -19,6 +19,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xher::precision_ = Precision::kHalf; template <> const Precision Xher::precision_ = Precision::kSingle; template <> const Precision Xher::precision_ = Precision::kDouble; template <> const Precision Xher::precision_ = Precision::kComplexSingle; @@ -43,6 +44,7 @@ template <> float2 Xher::GetAlpha(const float alpha) { return floa template <> double2 Xher::GetAlpha(const double alpha) { return double2{alpha, 0.0}; } template <> float Xher::GetAlpha(const float alpha) { return alpha; } template <> double Xher::GetAlpha(const double alpha) { return alpha; } +template <> half Xher::GetAlpha(const half alpha) { return alpha; } // ================================================================================================= @@ -114,6 +116,7 @@ StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xher; template class Xher; template class Xher; template class Xher; diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index 82052187..a73dde52 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -19,6 +19,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xher2::precision_ = Precision::kHalf; template <> const Precision Xher2::precision_ = Precision::kSingle; template <> const Precision Xher2::precision_ = Precision::kDouble; template <> const Precision Xher2::precision_ = Precision::kComplexSingle; @@ -106,6 +107,7 @@ StatusCode Xher2::DoHer2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xher2; template class Xher2; template class Xher2; template class Xher2; diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc index 55af2f29..c556b920 100644 --- a/src/routines/level2/xspr.cc +++ b/src/routines/level2/xspr.cc @@ -44,6 +44,7 @@ StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xspr; template class Xspr; template class Xspr; diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cc index 9a3f97ce..c4ad5dc4 100644 --- a/src/routines/level2/xspr2.cc +++ b/src/routines/level2/xspr2.cc @@ -46,6 +46,7 @@ StatusCode Xspr2::DoSpr2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xspr2; template class Xspr2; template class Xspr2; diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc index 4b3928e5..892517d7 100644 --- a/src/routines/level2/xsyr.cc +++ b/src/routines/level2/xsyr.cc @@ -43,6 +43,7 @@ StatusCode Xsyr::DoSyr(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsyr; template class Xsyr; template class Xsyr; diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cc index 3ae389e0..e6dfd158 100644 --- a/src/routines/level2/xsyr2.cc +++ b/src/routines/level2/xsyr2.cc @@ -45,6 +45,7 @@ StatusCode Xsyr2::DoSyr2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsyr2; template class Xsyr2; template class Xsyr2; -- cgit v1.2.3